module analysis

data preparation

new202310 rename:

B1: AD:/storage/xuhepingLab/0.share/projects/20230529_SS2_LYN
B2: GBM:/storage/hedanyangLab/zhaojinghong/projects/25-LYN.20221212.bulk
B3: EAE:/storage/hedanyangLab/zhaojinghong/projects/31-LYN.20230106.bulk
B4: cuprizone:/storage/hedanyangLab/zhaojinghong/projects/17-LYN.20221010.bulk
B5: SIMPLE: /storage/hedanyangLab/zhaojinghong/projects/37-LYN.20230219.bulk

Ctrl as C
X_Spp1_N as N
X_Spp1_P as P

test counts

load_mat <- function(dat){
  mat_raw <- read.table(dat, header = TRUE, stringsAsFactors = F, sep = "\t")
  rownames(mat_raw) <- mat_raw$gene_id
  mat_raw <- mat_raw[,2:ncol(mat_raw)]
  mat_raw <- as.matrix(mat_raw)
  mat_raw <- edgeR::cpm(mat_raw)
  mat_raw <- round(mat_raw)
  list_pc <- 'I:/Shared_win/genomics/mouse/GRCm38_vM25/gtf_detail/list_pc.lv1_2'
  id_pc <- as.vector(unlist(read.table(list_pc)))
  mat_pc <- mat_raw[id_pc,]
  return(mat_pc)
}
mat_pc <- list(B1=load_mat("../output_new/AD/RNAseq.20230530_SS2_LYN.counts.gene.matrix"),
                 B2=load_mat("../output_new/GBM/RNAseq.LYN.20221212.counts.gene.matrix"),
               B3=load_mat("../output_new/EAE/RNAseq.LYN.20230106.counts.gene.matrix"),
               B4=load_mat("../output_new/cuprizone/RNAseq.LYN.20221010.Spp1.counts.gene.matrix"),
               B5=load_mat("../output_new/SIMPLE/RNAseq.LYN.20230219.counts.gene.matrix"))
lapply(mat_pc, dim)
## $B1
## [1] 21649    12
## 
## $B2
## [1] 21649    12
## 
## $B3
## [1] 21649     9
## 
## $B4
## [1] 21649    10
## 
## $B5
## [1] 21649    12
lapply(mat_pc, colnames)
## $B1
##  [1] "B1.C.1" "B1.C.2" "B1.C.3" "B1.C.4" "B1.N.1" "B1.N.2" "B1.N.3" "B1.N.4"
##  [9] "B1.P.1" "B1.P.2" "B1.P.3" "B1.P.4"
## 
## $B2
##  [1] "Ctrl_1"       "Ctrl_2"       "Ctrl_3"       "Ctrl_4"       "GBM_1_Spp1_N"
##  [6] "GBM_1_Spp1_P" "GBM_2_Spp1_N" "GBM_2_Spp1_P" "GBM_3_Spp1_N" "GBM_3_Spp1_P"
## [11] "GBM_4_Spp1_N" "GBM_4_Spp1_P"
## 
## $B3
## [1] "Ctrl_1"      "Ctrl_2"      "Ctrl_3"      "EAE_1_GFP_N" "EAE_1_GFP_P"
## [6] "EAE_2_GFP_N" "EAE_2_GFP_P" "EAE_3_GFP_N" "EAE_3_GFP_P"
## 
## $B4
##  [1] "Ctrl_1"      "Ctrl_2"      "Ctrl_3"      "Ctrl_4"      "CZ_1_Spp1_N"
##  [6] "CZ_1_Spp1_P" "CZ_2_Spp1_N" "CZ_2_Spp1_P" "CZ_3_Spp1_N" "CZ_3_Spp1_P"
## 
## $B5
##  [1] "C.1.GFP.N" "C.2.GFP.N" "C.3.GFP.N" "C.4.GFP.N" "S.1.GFP.N" "S.1.GFP.P"
##  [7] "S.2.GFP.N" "S.2.GFP.P" "S.3.GFP.N" "S.3.GFP.P" "S.4.GFP.N" "S.4.GFP.P"
# reorder as C:N:P
#mat_pc$B1 <- mat_pc$B1[,c(grep("Ctrl",colnames(mat_pc$B1)),
#                          grep("Spp1_N",colnames(mat_pc$B1)),
#                          grep("Spp1_P",colnames(mat_pc$B1)),)]

mat_pc <- lapply(mat_pc, function(mat){
  
  mat[,c(grep("^Ctrl|^C...GFP.N|B1.C",colnames(mat),perl = T),
         grep("_N$|^S...GFP.N|B1.N",colnames(mat),perl = T),
         grep("_P$|^S...GFP.P|B1.P",colnames(mat),perl = T))]
})
lapply(mat_pc, dim)
## $B1
## [1] 21649    12
## 
## $B2
## [1] 21649    12
## 
## $B3
## [1] 21649     9
## 
## $B4
## [1] 21649    10
## 
## $B5
## [1] 21649    12
lapply(mat_pc, colnames)
## $B1
##  [1] "B1.C.1" "B1.C.2" "B1.C.3" "B1.C.4" "B1.N.1" "B1.N.2" "B1.N.3" "B1.N.4"
##  [9] "B1.P.1" "B1.P.2" "B1.P.3" "B1.P.4"
## 
## $B2
##  [1] "Ctrl_1"       "Ctrl_2"       "Ctrl_3"       "Ctrl_4"       "GBM_1_Spp1_N"
##  [6] "GBM_2_Spp1_N" "GBM_3_Spp1_N" "GBM_4_Spp1_N" "GBM_1_Spp1_P" "GBM_2_Spp1_P"
## [11] "GBM_3_Spp1_P" "GBM_4_Spp1_P"
## 
## $B3
## [1] "Ctrl_1"      "Ctrl_2"      "Ctrl_3"      "EAE_1_GFP_N" "EAE_2_GFP_N"
## [6] "EAE_3_GFP_N" "EAE_1_GFP_P" "EAE_2_GFP_P" "EAE_3_GFP_P"
## 
## $B4
##  [1] "Ctrl_1"      "Ctrl_2"      "Ctrl_3"      "Ctrl_4"      "CZ_1_Spp1_N"
##  [6] "CZ_2_Spp1_N" "CZ_3_Spp1_N" "CZ_1_Spp1_P" "CZ_2_Spp1_P" "CZ_3_Spp1_P"
## 
## $B5
##  [1] "C.1.GFP.N" "C.2.GFP.N" "C.3.GFP.N" "C.4.GFP.N" "S.1.GFP.N" "S.2.GFP.N"
##  [7] "S.3.GFP.N" "S.4.GFP.N" "S.1.GFP.P" "S.2.GFP.P" "S.3.GFP.P" "S.4.GFP.P"
names(mat_pc)
## [1] "B1" "B2" "B3" "B4" "B5"
# rename
for(NN in names(mat_pc)){
  
  colnames(mat_pc[[NN]]) <- c(paste0(paste0(NN,".C."),1:length(grep("^Ctrl|^C...GFP.N|B1.C",colnames(mat_pc[[NN]]),perl = T))),
                              paste0(paste0(NN,".N."),1:length(grep("_N$|^S...GFP.N|B1.N",colnames(mat_pc[[NN]]),perl = T))),
                              paste0(paste0(NN,".P."),1:length(grep("_P$|^S...GFP.P|B1.P",colnames(mat_pc[[NN]]),perl = T))))

}
lapply(mat_pc, colnames)
## $B1
##  [1] "B1.C.1" "B1.C.2" "B1.C.3" "B1.C.4" "B1.N.1" "B1.N.2" "B1.N.3" "B1.N.4"
##  [9] "B1.P.1" "B1.P.2" "B1.P.3" "B1.P.4"
## 
## $B2
##  [1] "B2.C.1" "B2.C.2" "B2.C.3" "B2.C.4" "B2.N.1" "B2.N.2" "B2.N.3" "B2.N.4"
##  [9] "B2.P.1" "B2.P.2" "B2.P.3" "B2.P.4"
## 
## $B3
## [1] "B3.C.1" "B3.C.2" "B3.C.3" "B3.N.1" "B3.N.2" "B3.N.3" "B3.P.1" "B3.P.2"
## [9] "B3.P.3"
## 
## $B4
##  [1] "B4.C.1" "B4.C.2" "B4.C.3" "B4.C.4" "B4.N.1" "B4.N.2" "B4.N.3" "B4.P.1"
##  [9] "B4.P.2" "B4.P.3"
## 
## $B5
##  [1] "B5.C.1" "B5.C.2" "B5.C.3" "B5.C.4" "B5.N.1" "B5.N.2" "B5.N.3" "B5.N.4"
##  [9] "B5.P.1" "B5.P.2" "B5.P.3" "B5.P.4"
cowplot::plot_grid(
plotlist = lapply(mat_pc,function(mat){
  
  ggplot(reshape2::melt(data.frame(mat)), aes(x=variable, y=log2(value+1))) + geom_point() +
    geom_boxplot() +
    stat_summary(fun=mean, geom="point", shape=18, size=3, color="red") +
  theme_classic() +
  theme(axis.text.x = element_text(angle = 60, hjust = 1, vjust = 1)) + labs(title = "data distribution - CPM", x="")
  
  
  
}), ncol = 2)

runPCA

pp.PCA <- function(mat,Name="PCA", Color=c("#006BD7","#D7770D","#FF4848")){
  rv <- rowVars(mat)
  selt <- order(rv, decreasing = TRUE)[seq_len(2000)]
  pca2 <- stats::prcomp(t(mat[selt,]), scale.=TRUE, center=TRUE)
  
  pca_d <- as.data.frame(pca2$x)
  
  pca_d[,"condition"] = gsub("[.]1|[.]2|[.]3|[.]4|[.]5","",x=colnames(mat),perl = T, fixed = F)
  pca_d[,"batch"] = as.vector(unlist(sapply(colnames(mat), function(x){strsplit(x,split = ".")[[1]][1]})))
  pca_d[,"replicate"] = as.vector(unlist(sapply(colnames(mat), function(x){strsplit(x,split = ".")[[1]][3]})))
  
  ggplot(data=pca_d, aes(x=PC1, y=PC2, color=condition)) +
    geom_point(size=3.5) +
    ggrepel::geom_text_repel(mapping = aes(label=colnames(mat)),size=2.5, max.overlaps = 500) + labs(title = Name) +
    scale_colour_manual(values = Color) + guides(color=guide_legend(reverse = F)) + theme_classic()
}
cowplot::plot_grid(
cowplot::plot_grid(
plotlist = lapply(mat_pc, pp.PCA, Name="PCA of CPM")[1:2],
ncol = 3),
cowplot::plot_grid(
plotlist = lapply(mat_pc, pp.PCA, Name="PCA of CPM")[3:5],
ncol = 3),
ncol = 1)

pp.PCA(cbind(mat_pc$B1,
             mat_pc$B2,
             mat_pc$B3,
             mat_pc$B4,
             mat_pc$B5
             ),Color = rep(c("#006BD7","#D7770D","#FF4848"),5), Name = "PCA of CPM")

pp.PCA(cbind(log2(mat_pc$B1+1),
             log2(mat_pc$B2+1),
             log2(mat_pc$B3+1),
             log2(mat_pc$B4+1),
             log2(mat_pc$B5+1)
             ),Color = rep(c("#006BD7","#D7770D","#FF4848"),5), Name = "PCA of log2(CPM+1)")

lapply(mat_pc, colnames)[[1]]
##  [1] "B1.C.1" "B1.C.2" "B1.C.3" "B1.C.4" "B1.N.1" "B1.N.2" "B1.N.3" "B1.N.4"
##  [9] "B1.P.1" "B1.P.2" "B1.P.3" "B1.P.4"
gsub("[.]1|[.]2|[.]3|[.]4|[.]5","",x=lapply(mat_pc, colnames)[[1]],perl = T, fixed = F)
##  [1] "B1.C" "B1.C" "B1.C" "B1.C" "B1.N" "B1.N" "B1.N" "B1.N" "B1.P" "B1.P"
## [11] "B1.P" "B1.P"

batch corrected

mat_pc.combine <- cbind(mat_pc$B1,
                        mat_pc$B2,
                        mat_pc$B3,
                        mat_pc$B4,
                        mat_pc$B5
                        )

cnt.df <- data.frame(row.names = colnames(mat_pc.combine),
                     batch = as.vector(unlist(sapply(colnames(mat_pc.combine), function(x){strsplit(x,split = "[.]")[[1]][1]}))),
                     condition1 = gsub("[.]1|[.]2|[.]3|[.]4|[.]5","",x=colnames(mat_pc.combine),perl = T, fixed = F))

cnt.df$condition2 <- cnt.df$condition1

cnt.df$condition2[grep("[.]C",cnt.df$condition2)] <- "Ctrl"
cnt.df
##        batch condition1 condition2
## B1.C.1    B1       B1.C       Ctrl
## B1.C.2    B1       B1.C       Ctrl
## B1.C.3    B1       B1.C       Ctrl
## B1.C.4    B1       B1.C       Ctrl
## B1.N.1    B1       B1.N       B1.N
## B1.N.2    B1       B1.N       B1.N
## B1.N.3    B1       B1.N       B1.N
## B1.N.4    B1       B1.N       B1.N
## B1.P.1    B1       B1.P       B1.P
## B1.P.2    B1       B1.P       B1.P
## B1.P.3    B1       B1.P       B1.P
## B1.P.4    B1       B1.P       B1.P
## B2.C.1    B2       B2.C       Ctrl
## B2.C.2    B2       B2.C       Ctrl
## B2.C.3    B2       B2.C       Ctrl
## B2.C.4    B2       B2.C       Ctrl
## B2.N.1    B2       B2.N       B2.N
## B2.N.2    B2       B2.N       B2.N
## B2.N.3    B2       B2.N       B2.N
## B2.N.4    B2       B2.N       B2.N
## B2.P.1    B2       B2.P       B2.P
## B2.P.2    B2       B2.P       B2.P
## B2.P.3    B2       B2.P       B2.P
## B2.P.4    B2       B2.P       B2.P
## B3.C.1    B3       B3.C       Ctrl
## B3.C.2    B3       B3.C       Ctrl
## B3.C.3    B3       B3.C       Ctrl
## B3.N.1    B3       B3.N       B3.N
## B3.N.2    B3       B3.N       B3.N
## B3.N.3    B3       B3.N       B3.N
## B3.P.1    B3       B3.P       B3.P
## B3.P.2    B3       B3.P       B3.P
## B3.P.3    B3       B3.P       B3.P
## B4.C.1    B4       B4.C       Ctrl
## B4.C.2    B4       B4.C       Ctrl
## B4.C.3    B4       B4.C       Ctrl
## B4.C.4    B4       B4.C       Ctrl
## B4.N.1    B4       B4.N       B4.N
## B4.N.2    B4       B4.N       B4.N
## B4.N.3    B4       B4.N       B4.N
## B4.P.1    B4       B4.P       B4.P
## B4.P.2    B4       B4.P       B4.P
## B4.P.3    B4       B4.P       B4.P
## B5.C.1    B5       B5.C       Ctrl
## B5.C.2    B5       B5.C       Ctrl
## B5.C.3    B5       B5.C       Ctrl
## B5.C.4    B5       B5.C       Ctrl
## B5.N.1    B5       B5.N       B5.N
## B5.N.2    B5       B5.N       B5.N
## B5.N.3    B5       B5.N       B5.N
## B5.N.4    B5       B5.N       B5.N
## B5.P.1    B5       B5.P       B5.P
## B5.P.2    B5       B5.P       B5.P
## B5.P.3    B5       B5.P       B5.P
## B5.P.4    B5       B5.P       B5.P
mat_pc.combine.crt <- sva::ComBat_seq(counts = as.matrix(mat_pc.combine),
                               batch = cnt.df$batch,
                               group = cnt.df$condition2)
## Found 5 batches
## Using full model in ComBat-seq.
## Adjusting for 10 covariate(s) or covariate level(s)
## Estimating dispersions
## Fitting the GLM model
## Shrinkage off - using GLM estimates for parameters
## Adjusting the data
pp.PCA(mat_pc.combine.crt,Color = rep(c("#006BD7","#D7770D","#FF4848"),5), Name = "PCA of CPM.crt")

pp.PCA(log2(mat_pc.combine.crt+1),Color = rep(c("#006BD7","#D7770D","#FF4848"),5), Name = "PCA of log2(CPM.crt+1)")

test TPM

load_matt <- function(dat){
  matt_raw <- read.table(dat, header = TRUE, stringsAsFactors = F, sep = "\t")
  rownames(matt_raw) <- matt_raw$gene_id
  matt_raw <- matt_raw[,2:ncol(matt_raw)]
  matt_raw <- as.matrix(matt_raw)
  #matt_raw <- edgeR::cpm(matt_raw)
  #matt_raw <- round(matt_raw)
  list_pc <- 'I:/Shared_win/genomics/mouse/GRCm38_vM25/gtf_detail/list_pc.lv1_2'
  id_pc <- as.vector(unlist(read.table(list_pc)))
  matt_pc <- matt_raw[id_pc,]
  return(matt_pc)
}
matt_pc <- list(B1=load_matt("../output_new/AD/RNAseq.20230530_SS2_LYN.tpm.gene.matrix"),
                B2=load_matt("../output_new/GBM/RNAseq.LYN.20221212.tpm.gene.matrix"),
                B3=load_matt("../output_new/EAE/RNAseq.LYN.20230106.tpm.gene.matrix"),
                B4=load_matt("../output_new/cuprizone/RNAseq.LYN.20221010.Spp1.tpm.gene.matrix"),
                B5=load_matt("../output_new/SIMPLE/RNAseq.LYN.20230219.tpm.gene.matrix"))
lapply(matt_pc, dim)
## $B1
## [1] 21649    12
## 
## $B2
## [1] 21649    12
## 
## $B3
## [1] 21649     9
## 
## $B4
## [1] 21649    10
## 
## $B5
## [1] 21649    12
lapply(matt_pc, colnames)
## $B1
##  [1] "B1.C.1" "B1.C.2" "B1.C.3" "B1.C.4" "B1.N.1" "B1.N.2" "B1.N.3" "B1.N.4"
##  [9] "B1.P.1" "B1.P.2" "B1.P.3" "B1.P.4"
## 
## $B2
##  [1] "Ctrl_1"       "Ctrl_2"       "Ctrl_3"       "Ctrl_4"       "GBM_1_Spp1_N"
##  [6] "GBM_1_Spp1_P" "GBM_2_Spp1_N" "GBM_2_Spp1_P" "GBM_3_Spp1_N" "GBM_3_Spp1_P"
## [11] "GBM_4_Spp1_N" "GBM_4_Spp1_P"
## 
## $B3
## [1] "Ctrl_1"      "Ctrl_2"      "Ctrl_3"      "EAE_1_GFP_N" "EAE_1_GFP_P"
## [6] "EAE_2_GFP_N" "EAE_2_GFP_P" "EAE_3_GFP_N" "EAE_3_GFP_P"
## 
## $B4
##  [1] "Ctrl_1"      "Ctrl_2"      "Ctrl_3"      "Ctrl_4"      "CZ_1_Spp1_N"
##  [6] "CZ_1_Spp1_P" "CZ_2_Spp1_N" "CZ_2_Spp1_P" "CZ_3_Spp1_N" "CZ_3_Spp1_P"
## 
## $B5
##  [1] "C.1.GFP.N" "C.2.GFP.N" "C.3.GFP.N" "C.4.GFP.N" "S.1.GFP.N" "S.1.GFP.P"
##  [7] "S.2.GFP.N" "S.2.GFP.P" "S.3.GFP.N" "S.3.GFP.P" "S.4.GFP.N" "S.4.GFP.P"
# reorder as C:N:P
#matt_pc$B1 <- matt_pc$B1[,c(grep("Ctrl",colnames(matt_pc$B1)),
#                          grep("Spp1_N",colnames(matt_pc$B1)),
#                          grep("Spp1_P",colnames(matt_pc$B1)),)]

matt_pc <- lapply(matt_pc, function(matt){
  
  matt[,c(grep("^Ctrl|^C...GFP.N|B1.C",colnames(matt),perl = T),
         grep("_N$|^S...GFP.N|B1.N",colnames(matt),perl = T),
         grep("_P$|^S...GFP.P|B1.P",colnames(matt),perl = T))]
})
lapply(matt_pc, dim)
## $B1
## [1] 21649    12
## 
## $B2
## [1] 21649    12
## 
## $B3
## [1] 21649     9
## 
## $B4
## [1] 21649    10
## 
## $B5
## [1] 21649    12
lapply(matt_pc, colnames)
## $B1
##  [1] "B1.C.1" "B1.C.2" "B1.C.3" "B1.C.4" "B1.N.1" "B1.N.2" "B1.N.3" "B1.N.4"
##  [9] "B1.P.1" "B1.P.2" "B1.P.3" "B1.P.4"
## 
## $B2
##  [1] "Ctrl_1"       "Ctrl_2"       "Ctrl_3"       "Ctrl_4"       "GBM_1_Spp1_N"
##  [6] "GBM_2_Spp1_N" "GBM_3_Spp1_N" "GBM_4_Spp1_N" "GBM_1_Spp1_P" "GBM_2_Spp1_P"
## [11] "GBM_3_Spp1_P" "GBM_4_Spp1_P"
## 
## $B3
## [1] "Ctrl_1"      "Ctrl_2"      "Ctrl_3"      "EAE_1_GFP_N" "EAE_2_GFP_N"
## [6] "EAE_3_GFP_N" "EAE_1_GFP_P" "EAE_2_GFP_P" "EAE_3_GFP_P"
## 
## $B4
##  [1] "Ctrl_1"      "Ctrl_2"      "Ctrl_3"      "Ctrl_4"      "CZ_1_Spp1_N"
##  [6] "CZ_2_Spp1_N" "CZ_3_Spp1_N" "CZ_1_Spp1_P" "CZ_2_Spp1_P" "CZ_3_Spp1_P"
## 
## $B5
##  [1] "C.1.GFP.N" "C.2.GFP.N" "C.3.GFP.N" "C.4.GFP.N" "S.1.GFP.N" "S.2.GFP.N"
##  [7] "S.3.GFP.N" "S.4.GFP.N" "S.1.GFP.P" "S.2.GFP.P" "S.3.GFP.P" "S.4.GFP.P"
names(matt_pc)
## [1] "B1" "B2" "B3" "B4" "B5"
# rename
for(NN in names(matt_pc)){
  
  colnames(matt_pc[[NN]]) <- c(paste0(paste0(NN,".C."),1:length(grep("^Ctrl|^C...GFP.N|B1.C",colnames(matt_pc[[NN]]),perl = T))),
                              paste0(paste0(NN,".N."),1:length(grep("_N$|^S...GFP.N|B1.N",colnames(matt_pc[[NN]]),perl = T))),
                              paste0(paste0(NN,".P."),1:length(grep("_P$|^S...GFP.P|B1.P",colnames(matt_pc[[NN]]),perl = T))))

}
lapply(matt_pc, colnames)
## $B1
##  [1] "B1.C.1" "B1.C.2" "B1.C.3" "B1.C.4" "B1.N.1" "B1.N.2" "B1.N.3" "B1.N.4"
##  [9] "B1.P.1" "B1.P.2" "B1.P.3" "B1.P.4"
## 
## $B2
##  [1] "B2.C.1" "B2.C.2" "B2.C.3" "B2.C.4" "B2.N.1" "B2.N.2" "B2.N.3" "B2.N.4"
##  [9] "B2.P.1" "B2.P.2" "B2.P.3" "B2.P.4"
## 
## $B3
## [1] "B3.C.1" "B3.C.2" "B3.C.3" "B3.N.1" "B3.N.2" "B3.N.3" "B3.P.1" "B3.P.2"
## [9] "B3.P.3"
## 
## $B4
##  [1] "B4.C.1" "B4.C.2" "B4.C.3" "B4.C.4" "B4.N.1" "B4.N.2" "B4.N.3" "B4.P.1"
##  [9] "B4.P.2" "B4.P.3"
## 
## $B5
##  [1] "B5.C.1" "B5.C.2" "B5.C.3" "B5.C.4" "B5.N.1" "B5.N.2" "B5.N.3" "B5.N.4"
##  [9] "B5.P.1" "B5.P.2" "B5.P.3" "B5.P.4"
cowplot::plot_grid(
plotlist = lapply(matt_pc,function(matt){
  
  ggplot(reshape2::melt(data.frame(matt)), aes(x=variable, y=log2(value+1))) + geom_point() +
    geom_boxplot() +
    stat_summary(fun=mean, geom="point", shape=18, size=3, color="red") +
  theme_classic() +
  theme(axis.text.x = element_text(angle = 60, hjust = 1, vjust = 1)) + labs(title = "data distribution - TPM", x="")
  
  
  
}), ncol = 2)

cowplot::plot_grid(
plotlist = lapply(matt_pc, pp.PCA, Name="PCA of TPM"),
ncol = 2)

pp.PCA(cbind(matt_pc$B1,
             matt_pc$B2,
             matt_pc$B3,
             matt_pc$B4,
             matt_pc$B5
             ),Color = rep(c("#006BD7","#D7770D","#FF4848"),5), Name = "PCA of TPM")

pp.PCA(cbind(log2(matt_pc$B1+1),
             log2(matt_pc$B2+1),
             log2(matt_pc$B3+1),
             log2(matt_pc$B4+1),
             log2(matt_pc$B5+1)
             ),Color = rep(c("#006BD7","#D7770D","#FF4848"),5), Name = "PCA of log2(TPM+1)")

# check X/Y-link genes (only Y- remain in protein-coding list)
pheatmap::pheatmap(cbind(log2(matt_pc$B1+1),
             log2(matt_pc$B2+1),
             log2(matt_pc$B3+1),
             log2(matt_pc$B4+1),
             log2(matt_pc$B5+1)
             )[c("Actb","Uty","Ddx3y","Eif2s3y","Kdm5d"),],
             gaps_col = c(4,8,12,16,20,24,27,30,33,37,40,43,47,51),
             cluster_cols = F, cluster_rows = F)

seems like X-link genes not in protein coding list, anyway Y- seems enough here

#c("Xist","Tsix") %in% id_pc
matt_pc.combine <- cbind(matt_pc$B1,
                         matt_pc$B2,
                         matt_pc$B3,
                         matt_pc$B4,
                         matt_pc$B5
                        )

cnt.df <- data.frame(row.names = colnames(matt_pc.combine),
                     batch = as.vector(unlist(sapply(colnames(matt_pc.combine), function(x){strsplit(x,split = "[.]")[[1]][1]}))),
                     condition1 = gsub("[.]1|[.]2|[.]3|[.]4|[.]5","",x=colnames(matt_pc.combine),perl = T, fixed = F))

cnt.df$condition2 <- cnt.df$condition1

cnt.df$condition2[grep("[.]C",cnt.df$condition2)] <- "Ctrl"
cnt.df
##        batch condition1 condition2
## B1.C.1    B1       B1.C       Ctrl
## B1.C.2    B1       B1.C       Ctrl
## B1.C.3    B1       B1.C       Ctrl
## B1.C.4    B1       B1.C       Ctrl
## B1.N.1    B1       B1.N       B1.N
## B1.N.2    B1       B1.N       B1.N
## B1.N.3    B1       B1.N       B1.N
## B1.N.4    B1       B1.N       B1.N
## B1.P.1    B1       B1.P       B1.P
## B1.P.2    B1       B1.P       B1.P
## B1.P.3    B1       B1.P       B1.P
## B1.P.4    B1       B1.P       B1.P
## B2.C.1    B2       B2.C       Ctrl
## B2.C.2    B2       B2.C       Ctrl
## B2.C.3    B2       B2.C       Ctrl
## B2.C.4    B2       B2.C       Ctrl
## B2.N.1    B2       B2.N       B2.N
## B2.N.2    B2       B2.N       B2.N
## B2.N.3    B2       B2.N       B2.N
## B2.N.4    B2       B2.N       B2.N
## B2.P.1    B2       B2.P       B2.P
## B2.P.2    B2       B2.P       B2.P
## B2.P.3    B2       B2.P       B2.P
## B2.P.4    B2       B2.P       B2.P
## B3.C.1    B3       B3.C       Ctrl
## B3.C.2    B3       B3.C       Ctrl
## B3.C.3    B3       B3.C       Ctrl
## B3.N.1    B3       B3.N       B3.N
## B3.N.2    B3       B3.N       B3.N
## B3.N.3    B3       B3.N       B3.N
## B3.P.1    B3       B3.P       B3.P
## B3.P.2    B3       B3.P       B3.P
## B3.P.3    B3       B3.P       B3.P
## B4.C.1    B4       B4.C       Ctrl
## B4.C.2    B4       B4.C       Ctrl
## B4.C.3    B4       B4.C       Ctrl
## B4.C.4    B4       B4.C       Ctrl
## B4.N.1    B4       B4.N       B4.N
## B4.N.2    B4       B4.N       B4.N
## B4.N.3    B4       B4.N       B4.N
## B4.P.1    B4       B4.P       B4.P
## B4.P.2    B4       B4.P       B4.P
## B4.P.3    B4       B4.P       B4.P
## B5.C.1    B5       B5.C       Ctrl
## B5.C.2    B5       B5.C       Ctrl
## B5.C.3    B5       B5.C       Ctrl
## B5.C.4    B5       B5.C       Ctrl
## B5.N.1    B5       B5.N       B5.N
## B5.N.2    B5       B5.N       B5.N
## B5.N.3    B5       B5.N       B5.N
## B5.N.4    B5       B5.N       B5.N
## B5.P.1    B5       B5.P       B5.P
## B5.P.2    B5       B5.P       B5.P
## B5.P.3    B5       B5.P       B5.P
## B5.P.4    B5       B5.P       B5.P
matt_pc.combine.crt <- sva::ComBat_seq(counts = as.matrix(matt_pc.combine),
                               batch = cnt.df$batch,
                               group = cnt.df$condition2)
## Found 5 batches
## Using full model in ComBat-seq.
## Adjusting for 10 covariate(s) or covariate level(s)
## Estimating dispersions
## Fitting the GLM model
## Shrinkage off - using GLM estimates for parameters
## Adjusting the data
pp.PCA(matt_pc.combine.crt,Color = rep(c("#006BD7","#D7770D","#FF4848"),5), Name = "PCA of TPM.crt")

pp.PCA(log2(matt_pc.combine.crt+1),Color = rep(c("#006BD7","#D7770D","#FF4848"),5), Name = "PCA of log2(TPM.crt+1)")

using log2(correctedCPM/TPM + 1) for downstream modeling

test the differential result for each batch in individual script

WGCNA

head(matt_pc.combine.crt)
##               B1.C.1 B1.C.2 B1.C.3 B1.C.4 B1.N.1 B1.N.2 B1.N.3 B1.N.4 B1.P.1
## 0610009B22Rik     98     41     92     87     35     55     39     45     30
## 0610010F05Rik      3      0      7      4      4      3      4      3      2
## 0610010K14Rik    173    180    157    138     80    100     86    108     48
## 0610012G03Rik     56     41     31     51     47     52     45     50     24
## 0610030E20Rik     36     48     42     45     27     43     43     44     36
## 0610040J01Rik    285    235    217    279    142    120    119    160     70
##               B1.P.2 B1.P.3 B1.P.4 B2.C.1 B2.C.2 B2.C.3 B2.C.4 B2.N.1 B2.N.2
## 0610009B22Rik     42     31     39     83     98     64     71     32     40
## 0610010F05Rik      0      2      4      2      7      4      5      5      2
## 0610010K14Rik     40     48     50    181    130    173    157    124    108
## 0610012G03Rik     40     45     45     42     39     53     45     33     41
## 0610030E20Rik     26     28     44     37     45     47     44     30     25
## 0610040J01Rik     62     74     68    287    247    236    240     64     91
##               B2.N.3 B2.N.4 B2.P.1 B2.P.2 B2.P.3 B2.P.4 B3.C.1 B3.C.2 B3.C.3
## 0610009B22Rik     37     51     45     36     43     46     76     78     87
## 0610010F05Rik      3      5      5      4      6      4      4      4      5
## 0610010K14Rik    100    115    104     87    102    102    133    168    189
## 0610012G03Rik     37     39     49     43     46     47     48     38     50
## 0610030E20Rik     29     25     19     16     17     14     37     47     47
## 0610040J01Rik     50     57     48     29     19     27    270    247    253
##               B3.N.1 B3.N.2 B3.N.3 B3.P.1 B3.P.2 B3.P.3 B4.C.1 B4.C.2 B4.C.3
## 0610009B22Rik  62.00     59     55     58     58     49     72     69  97.00
## 0610010F05Rik   0.81      4      4      3      3      4      4      4   0.52
## 0610010K14Rik 103.00    104     86     88     74     82    158    191 149.00
## 0610012G03Rik  60.00     49     54     55     54     50     39     50  44.00
## 0610030E20Rik  27.00     49     45     31     40     33     47     51  30.00
## 0610040J01Rik  93.00    120     87     64     68     55    260    251 259.00
##               B4.C.4 B4.N.1 B4.N.2 B4.N.3 B4.P.1 B4.P.2 B4.P.3 B5.C.1 B5.C.2
## 0610009B22Rik     85     89    105    117     65     68  34.00     68     55
## 0610010F05Rik      6      2      2      2      2      2   0.52      4      6
## 0610010K14Rik    159    166    141    172    101    120 104.00    171    127
## 0610012G03Rik     48     33     51     59     36     54  66.00     39     39
## 0610030E20Rik     43     45     32     51     35     27  29.00     29     38
## 0610040J01Rik    260    207    196    205    102     97  83.00    225    212
##               B5.C.3 B5.C.4 B5.N.1 B5.N.2 B5.N.3 B5.N.4 B5.P.1 B5.P.2 B5.P.3
## 0610009B22Rik     76     66     63     68     51     81     45     48     40
## 0610010F05Rik      3      2      4      3      3      2      3      3      2
## 0610010K14Rik    125    119    117    170    121     98     85    110     62
## 0610012G03Rik     34     38     38     28     38     45     53     45     44
## 0610030E20Rik     43     35     33     49     39     37     23     29     22
## 0610040J01Rik    234    183    239    189    176    213    103     81     73
##               B5.P.4
## 0610009B22Rik     31
## 0610010F05Rik      3
## 0610010K14Rik     72
## 0610012G03Rik     26
## 0610030E20Rik     37
## 0610040J01Rik    160

PCA plot

save pdf

library(ellipse)
## Warning: package 'ellipse' was built under R version 4.0.5
## 
## Attaching package: 'ellipse'
## The following object is masked from 'package:graphics':
## 
##     pairs
pp.PCA.m <- function(mat,Name="PCA", Color=c("#006BD7","#D7770D","#FF4848"),Size=4.5, Alpha=0.75, Font=2, ellipse=TRUE, label=TRUE, Condition=NULL){
  rv <- rowVars(mat)
  selt <- order(rv, decreasing = TRUE)[seq_len(2000)]
  pca2 <- stats::prcomp(t(mat[selt,]), scale.=TRUE, center=TRUE)
  
  pca_d <- as.data.frame(pca2$x)
  
  pca_d[,"condition"] = gsub("[.]1|[.]2|[.]3|[.]4|[.]5","",x=colnames(mat),perl = T, fixed = F)
  pca_d[,"batch"] = as.vector(unlist(sapply(colnames(mat), function(x){strsplit(x,split = ".")[[1]][1]})))
  pca_d[,"replicate"] = as.vector(unlist(sapply(colnames(mat), function(x){strsplit(x,split = ".")[[1]][3]})))
  
  pca_d$condition2 = pca_d$condition
  
  if(!is.null(Condition)){
    pca_d$condition2 = Condition
  }
  
  centroids <- aggregate(cbind(PC1,PC2)~condition2, pca_d, mean)
  rownames(centroids) <- centroids$condition2
  conf.rgn <- do.call(rbind, lapply(unique(pca_d$condition2),function(t)
    data.frame(condition=as.character(t),
               ellipse::ellipse(cov(pca_d[pca_d$condition2==t,1:2]),
                                centre=as.matrix(centroids[t,2:3]),
                                level=0.95),
               stringsAsFactors = FALSE)))
  
  
  xxx <- ggplot(data=pca_d, aes(x=PC1, y=PC2, color=condition)) +
    geom_point(size=Size,alpha=Alpha)  +
  #stat_ellipse(level = 0.85,show.legend = F) +
  #ggrepel::geom_text_repel(mapping = aes(label=colnames(mat)),size=Font, show.legend = F) + 
    labs(title = Name) +
    scale_colour_manual(values = Color) + guides(color=guide_legend(reverse = F)) + 
    theme_classic()

  if(label==TRUE){
    xxx <- xxx+ggrepel::geom_text_repel(mapping = aes(label=colnames(mat)),size=Font, show.legend = F, max.overlaps = 50)
  }
  
  if(ellipse==TRUE){
    xxx <- xxx+geom_path(data=conf.rgn, show.legend = F)
  }
  xxx
}
cowplot::plot_grid(
cowplot::plot_grid(
plotlist = lapply(mat_pc, pp.PCA.m, Name="PCA of CPM", ellipse=T, label=F)[1:2],
ncol = 3),
cowplot::plot_grid(
plotlist = lapply(mat_pc, pp.PCA.m, Name="PCA of CPM", ellipse=T, label=F)[3:5],
ncol = 3),
ncol = 1)

cowplot::plot_grid(
cowplot::plot_grid(
plotlist = lapply(mat_pc, pp.PCA.m, Name="PCA of CPM", ellipse=F, label=T)[1:2],
ncol = 3),
cowplot::plot_grid(
plotlist = lapply(mat_pc, pp.PCA.m, Name="PCA of CPM", ellipse=F, label=T)[3:5],
ncol = 3),
ncol = 1)

cowplot::plot_grid(
cowplot::plot_grid(
plotlist = lapply(matt_pc, pp.PCA.m, Name="PCA of TPM", ellipse=T, label=F)[1:2],
ncol = 3),
cowplot::plot_grid(
plotlist = lapply(matt_pc, pp.PCA.m, Name="PCA of TPM", ellipse=T, label=F)[3:5],
ncol = 3),
ncol = 1)

cowplot::plot_grid(
cowplot::plot_grid(
plotlist = lapply(matt_pc, pp.PCA.m, Name="PCA of TPM", ellipse=F, label=T)[1:2],
ncol = 3),
cowplot::plot_grid(
plotlist = lapply(matt_pc, pp.PCA.m, Name="PCA of TPM", ellipse=F, label=T)[3:5],
ncol = 3),
ncol = 1)

pp.PCA.m(log2(matt_pc.combine.crt+1),Color = rep(c("#006BD7","#D7770D","#FF4848"),5), Name = "PCA of log2(TPM.crt+1)", ellipse = F, label = T)

pp.PCA.m(log2(mat_pc.combine.crt+1),Color = rep(c("#006BD7","#D7770D","#FF4848"),5), Name = "PCA of log2(CPM.crt+1)", ellipse = F, label = T)

library(ellipse)
pp.PCAn <- function(mat,Name="PCA", Color=c("#006BD7","#D7770D","#FF4848"),Size=4.5, Alpha=0.75, Font=2, 
                    ellipse=TRUE, label=TRUE, Condition=NULL){
  rv <- rowVars(mat)
  selt <- order(rv, decreasing = TRUE)[seq_len(2000)]
  pca2 <- stats::prcomp(t(mat[selt,]), scale.=TRUE, center=TRUE)
  
  pca_d <- as.data.frame(pca2$x)
  
  pca_d[,"condition"] = gsub("[.]1|[.]2|[.]3|[.]4|[.]5","",x=colnames(mat),perl = T, fixed = F)
  pca_d[,"batch"] = as.vector(unlist(sapply(colnames(mat), function(x){strsplit(x,split = ".",fixed = T)[[1]][1]})))
  pca_d[,"replicate"] = as.vector(unlist(sapply(colnames(mat), function(x){strsplit(x,split = ".",fixed = T)[[1]][3]})))
  
  pca_d$condition2 = pca_d$condition
  
  if(!is.null(Condition)){
    pca_d$condition2 = Condition
  }
  
  centroids <- aggregate(cbind(PC1,PC2)~condition2, pca_d, mean)
  rownames(centroids) <- centroids$condition2
  conf.rgn <- do.call(rbind, lapply(unique(pca_d$condition2),function(t)
    data.frame(condition=as.character(t),
               ellipse::ellipse(cov(pca_d[pca_d$condition2==t,1:2]),
                                centre=as.matrix(centroids[t,2:3]),
                                level=0.95),
               stringsAsFactors = FALSE)))
  
  
  xxx <- ggplot(data=pca_d, aes(x=PC1, y=PC2, color=condition)) +
    geom_point(aes(shape=batch),size=Size,alpha=Alpha, show.legend = T)  +
  #stat_ellipse(level = 0.85,show.legend = F) +
  #ggrepel::geom_text_repel(mapping = aes(label=colnames(mat)),size=Font, show.legend = F) + 
    labs(title = Name, x="PC1", y="PC2") +
    scale_colour_manual(values = c(rep(Color,4),Color[1])) + guides(color=guide_legend(reverse = F)) + 
    guides(color=FALSE) +
    scale_shape_manual(values = c(0,2,5,1,6)) +
    theme_classic()

  if(label==TRUE){
    xxx <- xxx+ggrepel::geom_text_repel(mapping = aes(label=colnames(mat)),size=Font, show.legend = F, max.overlaps = 50)
  }
  
  if(ellipse==TRUE){
    xxx <- xxx+geom_path(data=conf.rgn, show.legend = F)
  }
  xxx
}
pp.PCAn(log2(matt_pc.combine.crt+1),Color = rep(c("#006BD7","#D7770D","#FF4848"),5), Name = "PCA of log2(TPM.crt+1)", ellipse = T, label = F, Condition = cnt.df$condition2)

pp.PCAn(log2(mat_pc.combine.crt+1),Color = rep(c("#006BD7","#D7770D","#FF4848"),5), Name = "PCA of log2(CPM.crt+1)", ellipse = T, label = F, Condition = cnt.df$condition2)

library(ellipse)
pp.PCAn.test <- function(mat,Name="PCA", Color=c("#006BD7","#D7770D","#FF4848"),Size=4.5, Alpha=0.75, Font=2, 
                    ellipse=TRUE, label=TRUE, Condition=NULL,
                    PCx="PC3",PCy="PC4"){
  rv <- rowVars(mat)
  selt <- order(rv, decreasing = TRUE)[seq_len(2000)]
  pca2 <- stats::prcomp(t(mat[selt,]), scale.=TRUE, center=TRUE)
  
  pca_d <- as.data.frame(pca2$x)
  
  pca_d[,"condition"] = gsub("[.]1|[.]2|[.]3|[.]4|[.]5","",x=colnames(mat),perl = T, fixed = F)
  pca_d[,"batch"] = as.vector(unlist(sapply(colnames(mat), function(x){strsplit(x,split = ".",fixed = T)[[1]][1]})))
  pca_d[,"replicate"] = as.vector(unlist(sapply(colnames(mat), function(x){strsplit(x,split = ".",fixed = T)[[1]][3]})))
  
  pca_d$condition2 = pca_d$condition
  
  if(!is.null(Condition)){
    pca_d$condition2 = Condition
  }
  
  centroids <- aggregate(cbind(get(PCx),get(PCy))~condition2, pca_d, mean)
  rownames(centroids) <- centroids$condition2
  conf.rgn <- do.call(rbind, lapply(unique(pca_d$condition2),function(t)
    data.frame(condition=as.character(t),
               ellipse::ellipse(cov(pca_d[pca_d$condition2==t,3:4]),
                                centre=as.matrix(centroids[t,2:3]),
                                level=0.95),
               stringsAsFactors = FALSE)))
  
  
  xxx <- ggplot(data=pca_d, aes(x=get(PCx), y=get(PCy), color=condition)) +
    geom_point(aes(shape=batch),size=Size,alpha=Alpha, show.legend = T)  +
  #stat_ellipse(level = 0.85,show.legend = F) +
  #ggrepel::geom_text_repel(mapping = aes(label=colnames(mat)),size=Font, show.legend = F) + 
    labs(title = Name, x=PCx, y=PCy) +
    scale_colour_manual(values = c(rep(Color,4),Color[1])) + guides(color=guide_legend(reverse = F)) + 
    guides(color=FALSE) +
    scale_shape_manual(values = c(0,2,5,1,6)) +
    theme_classic()

  if(label==TRUE){
    xxx <- xxx+ggrepel::geom_text_repel(mapping = aes(label=colnames(mat)),size=Font, show.legend = F, max.overlaps = 50)
  }
  
  if(ellipse==TRUE){
    xxx <- xxx+geom_path(data=conf.rgn, show.legend = F)
  }
  xxx
}
pp.PCAn.test(log2(matt_pc.combine.crt+1),Color = rep(c("#006BD7","#D7770D","#FF4848"),5), Name = "PCA of log2(TPM.crt+1)", ellipse = T, label = F, Condition = cnt.df$condition2,
        PCx = "PC3", PCy = "PC4")

pp.PCAn.test(log2(mat_pc.combine.crt+1),Color = rep(c("#006BD7","#D7770D","#FF4848"),5), Name = "PCA of log2(CPM.crt+1)", ellipse = T, label = F, Condition = cnt.df$condition2,
        PCx = "PC3", PCy = "PC4")

test

cnt.df
##        batch condition1 condition2
## B1.C.1    B1       B1.C       Ctrl
## B1.C.2    B1       B1.C       Ctrl
## B1.C.3    B1       B1.C       Ctrl
## B1.C.4    B1       B1.C       Ctrl
## B1.N.1    B1       B1.N       B1.N
## B1.N.2    B1       B1.N       B1.N
## B1.N.3    B1       B1.N       B1.N
## B1.N.4    B1       B1.N       B1.N
## B1.P.1    B1       B1.P       B1.P
## B1.P.2    B1       B1.P       B1.P
## B1.P.3    B1       B1.P       B1.P
## B1.P.4    B1       B1.P       B1.P
## B2.C.1    B2       B2.C       Ctrl
## B2.C.2    B2       B2.C       Ctrl
## B2.C.3    B2       B2.C       Ctrl
## B2.C.4    B2       B2.C       Ctrl
## B2.N.1    B2       B2.N       B2.N
## B2.N.2    B2       B2.N       B2.N
## B2.N.3    B2       B2.N       B2.N
## B2.N.4    B2       B2.N       B2.N
## B2.P.1    B2       B2.P       B2.P
## B2.P.2    B2       B2.P       B2.P
## B2.P.3    B2       B2.P       B2.P
## B2.P.4    B2       B2.P       B2.P
## B3.C.1    B3       B3.C       Ctrl
## B3.C.2    B3       B3.C       Ctrl
## B3.C.3    B3       B3.C       Ctrl
## B3.N.1    B3       B3.N       B3.N
## B3.N.2    B3       B3.N       B3.N
## B3.N.3    B3       B3.N       B3.N
## B3.P.1    B3       B3.P       B3.P
## B3.P.2    B3       B3.P       B3.P
## B3.P.3    B3       B3.P       B3.P
## B4.C.1    B4       B4.C       Ctrl
## B4.C.2    B4       B4.C       Ctrl
## B4.C.3    B4       B4.C       Ctrl
## B4.C.4    B4       B4.C       Ctrl
## B4.N.1    B4       B4.N       B4.N
## B4.N.2    B4       B4.N       B4.N
## B4.N.3    B4       B4.N       B4.N
## B4.P.1    B4       B4.P       B4.P
## B4.P.2    B4       B4.P       B4.P
## B4.P.3    B4       B4.P       B4.P
## B5.C.1    B5       B5.C       Ctrl
## B5.C.2    B5       B5.C       Ctrl
## B5.C.3    B5       B5.C       Ctrl
## B5.C.4    B5       B5.C       Ctrl
## B5.N.1    B5       B5.N       B5.N
## B5.N.2    B5       B5.N       B5.N
## B5.N.3    B5       B5.N       B5.N
## B5.N.4    B5       B5.N       B5.N
## B5.P.1    B5       B5.P       B5.P
## B5.P.2    B5       B5.P       B5.P
## B5.P.3    B5       B5.P       B5.P
## B5.P.4    B5       B5.P       B5.P
#
selt.top <- order(rowVars(matt_pc.combine.crt), decreasing = TRUE)[seq_len(8000)]
mat.test <- matt_pc.combine.crt[selt.top,]

#
idx.filt <- list()
for(NN in unique(cnt.df$condition1)){
  idx.filt[[NN]] <- rowMeans(matt_pc.combine[rownames(mat.test),grep(NN,colnames(mat.test))]) > 15 &
                        rowSums(matt_pc.combine[rownames(mat.test),grep(NN,colnames(mat.test))]>15) >=3
}

#
idx.filt.0 <- Reduce(function(x,y){x|y},idx.filt)
mat.test <- mat.test[idx.filt.0,]

mat.test <- log2(mat.test +1)
dim(matt_pc.combine.crt)
## [1] 21649    55
dim(mat.test)
## [1] 7136   55
head(mat.test)
##            B1.C.1    B1.C.2    B1.C.3    B1.C.4     B1.N.1    B1.N.2    B1.N.3
## Iigp1    0.000000  1.000000  1.584963  1.000000  1.0000000  1.000000  1.584963
## Mgl2     2.321928  1.584963  2.321928  1.584963  0.8797058  1.000000  1.000000
## Apoe    10.984418 11.380461 11.222191 11.217958 14.8830728 14.914245 14.683653
## Fam177a  0.000000  0.000000  2.321928  1.584963  2.0000000  1.584963  2.000000
## Cst3    16.189245 16.048487 16.098546 16.138392 16.0449277 16.156992 16.204839
## Ms4a7    1.000000  1.000000  1.584963  1.584963  1.0000000  1.000000  1.000000
##            B1.N.4   B1.P.1     B1.P.2    B1.P.3    B1.P.4    B2.C.1    B2.C.2
## Iigp1    1.000000  1.00000  1.0000000  1.584963  1.000000  0.000000  1.584963
## Mgl2     1.584963  0.00000  0.3561438  0.704872  1.584963  2.807355  1.584963
## Apoe    14.526744 16.39126 16.1803171 16.033767 16.186037 11.594325 11.271463
## Fam177a  0.000000  2.00000  1.0000000  2.000000  1.584963  0.000000  0.000000
## Cst3    16.177400 15.51348 15.5744457 15.708922 15.642390 16.135709 16.080547
## Ms4a7    1.000000  1.00000  1.0000000  1.000000  1.000000  1.584963  1.000000
##             B2.C.3     B2.C.4    B2.N.1    B2.N.2    B2.N.3    B2.N.4    B2.P.1
## Iigp1    0.6690268  1.0000000  6.000000  5.426265  5.930737  5.554589  6.044394
## Mgl2     2.0000000  1.0000000  1.000000  2.321928  3.000000  3.000000  1.000000
## Apoe    10.7648716 11.0279060 13.898885 13.934981 13.854284 14.612062 15.068400
## Fam177a  0.0000000  2.5849625  2.321928  2.584963  3.321928  3.000000  2.321928
## Cst3    16.1507389 16.0815042 14.085555 14.799029 13.821874 14.184720 12.588480
## Ms4a7    0.1634987  0.7311832  4.954196  4.754888  5.832890  5.882643  5.832890
##            B2.P.2    B2.P.3    B2.P.4   B3.C.1   B3.C.2   B3.C.3   B3.N.1
## Iigp1    5.807355  5.882643  5.857981  0.00000  0.00000  0.00000 18.18734
## Mgl2     2.807355  3.169925  3.000000  0.00000  0.00000  0.00000 16.73730
## Apoe    15.097497 14.907642 15.156202 11.05664 11.21614 11.38802 14.74041
## Fam177a  3.000000  2.807355  2.321928  0.00000  0.00000  0.00000 15.04345
## Cst3    12.600378 12.235715 12.511011 16.10206 16.12763 16.17828 15.11545
## Ms4a7    7.118941  7.076816  6.954196  0.00000  0.00000  0.00000 13.87508
##           B3.N.2   B3.N.3   B3.P.1     B3.P.2   B3.P.3    B4.C.1    B4.C.2
## Iigp1   18.99766 18.66766 18.43587 18.7893108 18.74362  1.000000  1.000000
## Mgl2    16.74264 16.91777  0.00000  0.8718436  0.00000  2.321928  2.321928
## Apoe    14.00922 14.62331 15.37785 15.3597496 15.44090 10.884934 11.038919
## Fam177a 16.28247 16.17865 15.33071 16.5238529 16.42813  2.000000  1.000000
## Cst3    14.86747 14.83047 14.24414 14.1350679 14.08713 16.172799 16.194488
## Ms4a7   13.88169 14.12283 16.40632 16.4740069 16.40537  1.000000  1.000000
##            B4.C.3     B4.C.4   B4.N.1   B4.N.2     B4.N.3    B4.P.1    B4.P.2
## Iigp1    1.584963  0.2016339  0.00000  1.00000  0.4646683  1.584963  1.000000
## Mgl2     2.584963  1.0000000  2.00000  1.00000  1.0000000  1.000000  1.000000
## Apoe    11.354249 11.5670054 11.87460 12.20212 11.5338164 16.007838 15.865854
## Fam177a  1.584963  0.0000000  0.00000  2.00000  1.5849625  1.000000  2.321928
## Cst3    16.181463 16.0112709 16.02447 16.01072 16.0185045 15.352906 15.283342
## Ms4a7    1.584963  1.0000000  1.00000  1.00000  2.0000000  1.584963  1.000000
##             B4.P.3     B5.C.1     B5.C.2     B5.C.3     B5.C.4    B5.N.1
## Iigp1    0.8237494  0.0000000  0.0000000  0.7990873  0.5945485  2.000000
## Mgl2     1.5849625  0.3895668  2.3219281  0.4646683  0.4329594  4.087463
## Apoe    15.9930047 10.6679985 11.0042205 10.8257538 11.2888661 13.142426
## Fam177a  1.0000000  0.0000000  0.0000000  2.0000000  2.0000000  1.000000
## Cst3    15.2409031 15.8412941 15.8410483 15.9093306 15.8975855 15.687813
## Ms4a7    1.0000000  0.0000000  0.7990873  0.0000000  0.0000000  0.000000
##             B5.N.2    B5.N.3     B5.N.4    B5.P.1    B5.P.2    B5.P.3    B5.P.4
## Iigp1    1.5849625  2.000000  0.9030383  3.321928  4.954196  4.807355  3.700440
## Mgl2     0.0000000  2.000000  2.0000000  3.169925  2.807355  2.807355  5.083213
## Apoe    13.4614794 12.301210 11.5588990 16.204972 16.274342 15.423871 15.243620
## Fam177a  1.0000000  1.584963  1.5849625  1.584963  1.584963  1.584963  1.000000
## Cst3    15.8270697 15.840680 15.8593164 15.053205 14.709299 14.880253 15.237359
## Ms4a7    0.4436067  0.000000  0.0000000  3.169925  4.087463  3.459432  5.044394
check.outlier <- c("Ltf","S100a8","S100a9","Ngp","Camp","Trem3","Lcn2")
matt_pc.combine[c(check.outlier,"Lag3"),]
##        B1.C.1 B1.C.2 B1.C.3 B1.C.4 B1.N.1  B1.N.2 B1.N.3 B1.N.4 B1.P.1 B1.P.2
## Ltf      0.00   0.00      0   0.00   0.00    0.00    0.0    0.0   0.00   0.00
## S100a8   0.00   0.00      0   0.00   0.00    0.00    0.0    0.0   0.00   0.00
## S100a9   0.00   0.00      0   0.00   0.00    0.00    0.0    0.0   0.00   0.00
## Ngp      0.00   0.00      0   0.00   0.00    0.00    0.0    0.0   0.00   0.00
## Camp     0.00   0.00      0   0.00   0.00    0.00    0.0    0.0   0.00   0.00
## Trem3    4.84   0.73      0   0.00   2.11    0.50    0.0    0.0   0.00   0.00
## Lcn2     0.00   0.00      0   0.00   0.00    0.00    0.0    0.0   0.00   0.00
## Lag3   660.05 729.37    689 690.87 919.16 1075.36  984.6 1115.3 791.13 855.37
##        B1.P.3 B1.P.4 B2.C.1 B2.C.2 B2.C.3 B2.C.4 B2.N.1 B2.N.2 B2.N.3 B2.N.4
## Ltf      0.00   0.00   0.00   0.00   0.00   0.00   0.00   0.76   0.15   0.00
## S100a8   0.00   0.00   0.00   0.00   0.00   0.00   0.00   0.00   1.32   0.00
## S100a9   0.00   0.00   0.00   1.76   0.00   0.00   0.00   0.00   1.21   0.00
## Ngp      0.00   0.00   0.00   0.00   0.00   0.00   0.00   0.00   0.00   0.00
## Camp     0.00   0.00   0.00   0.00   0.00   0.00   0.00   0.00   0.00   0.00
## Trem3    0.00   0.00   5.36   2.08   0.00   0.00  14.50   5.59  13.92  13.78
## Lcn2     0.00   0.00   1.51   0.00   0.00   0.00   8.05   0.48   3.73   0.58
## Lag3   908.82 902.91 545.32 553.36 567.03 513.83 460.77 517.29 181.96 325.38
##        B2.P.1 B2.P.2 B2.P.3 B2.P.4 B3.C.1 B3.C.2 B3.C.3  B3.N.1 B3.N.2  B3.N.3
## Ltf      0.00   0.00   0.83   0.00   0.00   0.00   0.00    0.00   0.00    0.00
## S100a8   0.00   2.08   5.40   0.00   0.00   0.00   0.00    0.00   0.00    0.00
## S100a9   0.00   0.95   0.00   0.00   0.00   0.00   0.00    0.83   0.00    0.00
## Ngp      0.00   0.00   0.00   0.00   0.00   0.00   0.00    0.00   0.00    0.00
## Camp     0.00   0.00   0.00   0.00   0.00   0.00   0.00    0.00   0.00    0.00
## Trem3    3.52   7.10  13.15   8.49   0.51   0.00   0.00    0.00   0.79    0.68
## Lcn2    12.84   0.42   5.47   3.93   0.00   0.00   0.00    0.00   0.00    0.77
## Lag3   648.39 318.97 206.74 176.39 484.00 454.17 475.36 1083.59 989.85 1143.17
##         B3.P.1  B3.P.2  B3.P.3 B4.C.1 B4.C.2 B4.C.3 B4.C.4 B4.N.1 B4.N.2 B4.N.3
## Ltf       0.00    0.00    0.00    0.0   0.00   0.00    0.0   0.00   0.00   0.00
## S100a8    0.00    1.25    1.23    0.0   0.00   0.00    0.0   0.00   0.00   0.00
## S100a9    0.00    0.00    0.00    0.0   0.00   0.00    0.0   0.00   0.00   0.00
## Ngp       0.00    0.00    0.00    0.0   0.00   0.00    0.0   0.00   0.00   0.00
## Camp      0.48    0.00    0.00    0.0   0.00   0.00    0.0   0.00   0.00   0.00
## Trem3     0.00    0.38    0.00    0.0   0.00   0.60    1.6   0.00   0.00   0.00
## Lcn2      0.37    0.85    0.86    0.0   0.72   0.00    0.0   0.00   0.00   0.00
## Lag3   1193.42 1221.94 1199.24  462.9 542.51 539.29  742.3 535.61 576.36 510.66
##        B4.P.1 B4.P.2 B4.P.3 B5.C.1 B5.C.2 B5.C.3 B5.C.4 B5.N.1 B5.N.2 B5.N.3
## Ltf      0.00   0.00   0.78   0.00   0.00   0.00   0.00   0.00   0.00   0.00
## S100a8   0.00   0.00   0.00   0.00   0.00   0.00   0.00   0.00   3.31   0.00
## S100a9   0.00   0.00   0.00   0.00   0.00   0.00   0.00   0.00   0.00   0.00
## Ngp      0.00   0.00   0.34   0.00   0.00   0.00   0.38   0.00   0.40   0.00
## Camp     0.00   0.00   0.00   0.00   0.00   0.00   0.00   0.00   0.00   0.00
## Trem3    0.00   0.00   0.00   0.00   1.63   0.00   0.50   0.00   0.00   0.00
## Lcn2     0.00   0.00   0.00   0.00   0.00   0.00   0.00   0.00   0.00   0.00
## Lag3   696.48 774.17 702.64 465.72 445.28 449.94 491.93 806.46 744.45 616.73
##        B5.N.4  B5.P.1  B5.P.2 B5.P.3 B5.P.4
## Ltf      0.00    0.00    0.00   0.00   0.00
## S100a8   0.00    0.00    0.00   0.00   0.00
## S100a9   0.00    0.00    0.00   0.00   0.00
## Ngp      0.00    0.00    0.00   0.00   0.00
## Camp     0.00    0.00    0.00   0.00   0.00
## Trem3    1.53    0.00    0.00   0.43   0.00
## Lcn2     0.00    0.00    0.00   0.55   0.00
## Lag3   586.61 1086.39 1002.87 898.92 836.32
c(check.outlier,"Lag3") %in% rownames(mat.test)
## [1] FALSE FALSE FALSE FALSE FALSE FALSE FALSE  TRUE
par(mfrow=c(2,2))
plot(x=rowMeans(mat_pc.combine.crt),
     y=rowVars(mat_pc.combine.crt), log="xy", 
     xlim=c(1e-2,3e+04),  
     ylim=c(1e-2,1e+11) ,
     main="unfiltered (TPM.crt)", xlab="rowMeans",ylab="rowVariations")

plot(x=rowMeans(2^(mat.test+1)),
     y=rowVars(2^(mat.test+1)), log="xy", 
     xlim=c(1e-2,4e+04), 
     ylim=c(1e-2,1e+11) ,
     main="filtered (TPM.crt)", xlab="rowMeans",ylab="rowVariations")

plot(x=rowMeans(log2(mat_pc.combine.crt+1) ),
     y=rowVars(log2(mat_pc.combine.crt+1) ), log="xy", 
     xlim=c(0.02,15), ylim=c(0.01,50),
     main="unfiltered (log2(TPM.crt+1) )", xlab="rowMeans",ylab="rowVariations")

plot(x=rowMeans(mat.test),
     y=rowVars(mat.test), log="xy", 
     xlim=c(0.02,15), ylim=c(0.01,50) ,
     main="filtered (log2(TPM.crt+1) )", xlab="rowMeans",ylab="rowVariations")

pdf("./result.new/matrix_filtering.pdf",
    width = 7.5,height = 6)
par(mfrow=c(2,2))
plot(x=rowMeans(mat_pc.combine.crt),
     y=rowVars(mat_pc.combine.crt), log="xy", 
     xlim=c(1e-2,3e+04),  
     ylim=c(1e-2,1e+11) , pch=20, cex=0.4, col = rgb(1,2,1, 60, maxColorValue = 255),
     main="unfiltered (TPM.crt)", xlab="rowMeans",ylab="rowVariations")
## Warning in xy.coords(x, y, xlabel, ylabel, log): 4761 x values <= 0 omitted from
## logarithmic plot
## Warning in xy.coords(x, y, xlabel, ylabel, log): 4761 y values <= 0 omitted from
## logarithmic plot
plot(x=rowMeans(2^(mat.test+1)),
     y=rowVars(2^(mat.test+1)), log="xy", 
     xlim=c(1e-1,4e+04), 
     ylim=c(1e-2,1e+11) ,pch=20, cex=0.4, col = rgb(1,2,1, 60, maxColorValue = 255),
     main="filtered (TPM.crt)", xlab="rowMeans",ylab="rowVariations")

plot(x=rowMeans(log2(mat_pc.combine.crt+1) ),
     y=rowVars(log2(mat_pc.combine.crt+1) ), log="xy", 
     xlim=c(0.02,15), ylim=c(0.01,50),pch=20, cex=0.4, col = rgb(1,2,1, 60, maxColorValue = 255),
     main="unfiltered (log2(TPM.crt+1) )", xlab="rowMeans",ylab="rowVariations")
## Warning in xy.coords(x, y, xlabel, ylabel, log): 4761 x values <= 0 omitted from
## logarithmic plot

## Warning in xy.coords(x, y, xlabel, ylabel, log): 4761 y values <= 0 omitted from
## logarithmic plot
plot(x=rowMeans(mat.test),
     y=rowVars(mat.test), log="xy", 
     xlim=c(0.1,15), ylim=c(0.01,50) ,pch=20, cex=0.4, col = rgb(1,2,1, 60, maxColorValue = 255),
     main="filtered (log2(TPM.crt+1) )", xlab="rowMeans",ylab="rowVariations")
dev.off()
## png 
##   2
matt_pc.crt <- list(B1=matt_pc.combine.crt[,grep("B1",colnames(matt_pc.combine.crt))],
                    B2=matt_pc.combine.crt[,grep("B2",colnames(matt_pc.combine.crt))],
                    B3=matt_pc.combine.crt[,grep("B3",colnames(matt_pc.combine.crt))],
                    B4=matt_pc.combine.crt[,grep("B4",colnames(matt_pc.combine.crt))],
                    B5=matt_pc.combine.crt[,grep("B5",colnames(matt_pc.combine.crt))]
                    )
cowplot::plot_grid(
plotlist = lapply(matt_pc.crt,function(matt){
  
  ggplot(reshape2::melt(data.frame(matt)), aes(x=variable, y=log2(value+1))) + geom_point() +
    geom_boxplot() +
    stat_summary(fun=mean, geom="point", shape=18, size=3, color="red") +
  theme_classic() +
  theme(axis.text.x = element_text(angle = 60, hjust = 1, vjust = 1)) + labs(title = "data distribution - TPM.crt", x="")
  
  
  
}), ncol = 2)

run

par(cex=0.75, mar=c(0,5,2,0))
#topN <- 2000

plot(hclust(dist(t(mat.test )), method = "average"), 
#plot(hclust(dist(t(mat.test )), method = "ward.D2"), 
     main = paste0("Sample clustering to detect ourliers, filtered ",nrow(mat.test)," pcgenes, log2(TPM.crt+1)"),
     sub="", xlab = "", cex.lab= 1.5, cex.axis = 1.5, cex.main = 2)

#abline(h = 15, col="red")
datExpr <- t(mat.test)
sampleTree <- hclust(dist(datExpr), method = "average")
#sampleTree <- hclust(dist(datExpr), method = "ward.D2")
cnt.df
##        batch condition1 condition2
## B1.C.1    B1       B1.C       Ctrl
## B1.C.2    B1       B1.C       Ctrl
## B1.C.3    B1       B1.C       Ctrl
## B1.C.4    B1       B1.C       Ctrl
## B1.N.1    B1       B1.N       B1.N
## B1.N.2    B1       B1.N       B1.N
## B1.N.3    B1       B1.N       B1.N
## B1.N.4    B1       B1.N       B1.N
## B1.P.1    B1       B1.P       B1.P
## B1.P.2    B1       B1.P       B1.P
## B1.P.3    B1       B1.P       B1.P
## B1.P.4    B1       B1.P       B1.P
## B2.C.1    B2       B2.C       Ctrl
## B2.C.2    B2       B2.C       Ctrl
## B2.C.3    B2       B2.C       Ctrl
## B2.C.4    B2       B2.C       Ctrl
## B2.N.1    B2       B2.N       B2.N
## B2.N.2    B2       B2.N       B2.N
## B2.N.3    B2       B2.N       B2.N
## B2.N.4    B2       B2.N       B2.N
## B2.P.1    B2       B2.P       B2.P
## B2.P.2    B2       B2.P       B2.P
## B2.P.3    B2       B2.P       B2.P
## B2.P.4    B2       B2.P       B2.P
## B3.C.1    B3       B3.C       Ctrl
## B3.C.2    B3       B3.C       Ctrl
## B3.C.3    B3       B3.C       Ctrl
## B3.N.1    B3       B3.N       B3.N
## B3.N.2    B3       B3.N       B3.N
## B3.N.3    B3       B3.N       B3.N
## B3.P.1    B3       B3.P       B3.P
## B3.P.2    B3       B3.P       B3.P
## B3.P.3    B3       B3.P       B3.P
## B4.C.1    B4       B4.C       Ctrl
## B4.C.2    B4       B4.C       Ctrl
## B4.C.3    B4       B4.C       Ctrl
## B4.C.4    B4       B4.C       Ctrl
## B4.N.1    B4       B4.N       B4.N
## B4.N.2    B4       B4.N       B4.N
## B4.N.3    B4       B4.N       B4.N
## B4.P.1    B4       B4.P       B4.P
## B4.P.2    B4       B4.P       B4.P
## B4.P.3    B4       B4.P       B4.P
## B5.C.1    B5       B5.C       Ctrl
## B5.C.2    B5       B5.C       Ctrl
## B5.C.3    B5       B5.C       Ctrl
## B5.C.4    B5       B5.C       Ctrl
## B5.N.1    B5       B5.N       B5.N
## B5.N.2    B5       B5.N       B5.N
## B5.N.3    B5       B5.N       B5.N
## B5.N.4    B5       B5.N       B5.N
## B5.P.1    B5       B5.P       B5.P
## B5.P.2    B5       B5.P       B5.P
## B5.P.3    B5       B5.P       B5.P
## B5.P.4    B5       B5.P       B5.P
datTraits <- data.frame(row.names = rownames(cnt.df),
                        #Ctrl = grepl("Ctrl",cnt.df$condition2),
                        B1.C = grepl("B1.C",cnt.df$condition1),
                        B1.N = grepl("B1.N",cnt.df$condition1),
                        B1.P = grepl("B1.P",cnt.df$condition1),
                        B2.C = grepl("B2.C",cnt.df$condition1),
                        B2.N = grepl("B2.N",cnt.df$condition1),
                        B2.P = grepl("B2.P",cnt.df$condition1),
                        B3.C = grepl("B3.C",cnt.df$condition1),
                        B3.N = grepl("B3.N",cnt.df$condition1),
                        B3.P = grepl("B3.P",cnt.df$condition1),
                        B4.C = grepl("B4.C",cnt.df$condition1),
                        B4.N = grepl("B4.N",cnt.df$condition1),
                        B4.P = grepl("B4.P",cnt.df$condition1),
                        B5.C = grepl("B5.C",cnt.df$condition1),
                        B5.N = grepl("B5.N",cnt.df$condition1),
                        B5.P = grepl("B5.P",cnt.df$condition1))

datTraits[datTraits==TRUE] <- 1
datTraits[datTraits==FALSE] <- 0

datTraits
##        B1.C B1.N B1.P B2.C B2.N B2.P B3.C B3.N B3.P B4.C B4.N B4.P B5.C B5.N
## B1.C.1    1    0    0    0    0    0    0    0    0    0    0    0    0    0
## B1.C.2    1    0    0    0    0    0    0    0    0    0    0    0    0    0
## B1.C.3    1    0    0    0    0    0    0    0    0    0    0    0    0    0
## B1.C.4    1    0    0    0    0    0    0    0    0    0    0    0    0    0
## B1.N.1    0    1    0    0    0    0    0    0    0    0    0    0    0    0
## B1.N.2    0    1    0    0    0    0    0    0    0    0    0    0    0    0
## B1.N.3    0    1    0    0    0    0    0    0    0    0    0    0    0    0
## B1.N.4    0    1    0    0    0    0    0    0    0    0    0    0    0    0
## B1.P.1    0    0    1    0    0    0    0    0    0    0    0    0    0    0
## B1.P.2    0    0    1    0    0    0    0    0    0    0    0    0    0    0
## B1.P.3    0    0    1    0    0    0    0    0    0    0    0    0    0    0
## B1.P.4    0    0    1    0    0    0    0    0    0    0    0    0    0    0
## B2.C.1    0    0    0    1    0    0    0    0    0    0    0    0    0    0
## B2.C.2    0    0    0    1    0    0    0    0    0    0    0    0    0    0
## B2.C.3    0    0    0    1    0    0    0    0    0    0    0    0    0    0
## B2.C.4    0    0    0    1    0    0    0    0    0    0    0    0    0    0
## B2.N.1    0    0    0    0    1    0    0    0    0    0    0    0    0    0
## B2.N.2    0    0    0    0    1    0    0    0    0    0    0    0    0    0
## B2.N.3    0    0    0    0    1    0    0    0    0    0    0    0    0    0
## B2.N.4    0    0    0    0    1    0    0    0    0    0    0    0    0    0
## B2.P.1    0    0    0    0    0    1    0    0    0    0    0    0    0    0
## B2.P.2    0    0    0    0    0    1    0    0    0    0    0    0    0    0
## B2.P.3    0    0    0    0    0    1    0    0    0    0    0    0    0    0
## B2.P.4    0    0    0    0    0    1    0    0    0    0    0    0    0    0
## B3.C.1    0    0    0    0    0    0    1    0    0    0    0    0    0    0
## B3.C.2    0    0    0    0    0    0    1    0    0    0    0    0    0    0
## B3.C.3    0    0    0    0    0    0    1    0    0    0    0    0    0    0
## B3.N.1    0    0    0    0    0    0    0    1    0    0    0    0    0    0
## B3.N.2    0    0    0    0    0    0    0    1    0    0    0    0    0    0
## B3.N.3    0    0    0    0    0    0    0    1    0    0    0    0    0    0
## B3.P.1    0    0    0    0    0    0    0    0    1    0    0    0    0    0
## B3.P.2    0    0    0    0    0    0    0    0    1    0    0    0    0    0
## B3.P.3    0    0    0    0    0    0    0    0    1    0    0    0    0    0
## B4.C.1    0    0    0    0    0    0    0    0    0    1    0    0    0    0
## B4.C.2    0    0    0    0    0    0    0    0    0    1    0    0    0    0
## B4.C.3    0    0    0    0    0    0    0    0    0    1    0    0    0    0
## B4.C.4    0    0    0    0    0    0    0    0    0    1    0    0    0    0
## B4.N.1    0    0    0    0    0    0    0    0    0    0    1    0    0    0
## B4.N.2    0    0    0    0    0    0    0    0    0    0    1    0    0    0
## B4.N.3    0    0    0    0    0    0    0    0    0    0    1    0    0    0
## B4.P.1    0    0    0    0    0    0    0    0    0    0    0    1    0    0
## B4.P.2    0    0    0    0    0    0    0    0    0    0    0    1    0    0
## B4.P.3    0    0    0    0    0    0    0    0    0    0    0    1    0    0
## B5.C.1    0    0    0    0    0    0    0    0    0    0    0    0    1    0
## B5.C.2    0    0    0    0    0    0    0    0    0    0    0    0    1    0
## B5.C.3    0    0    0    0    0    0    0    0    0    0    0    0    1    0
## B5.C.4    0    0    0    0    0    0    0    0    0    0    0    0    1    0
## B5.N.1    0    0    0    0    0    0    0    0    0    0    0    0    0    1
## B5.N.2    0    0    0    0    0    0    0    0    0    0    0    0    0    1
## B5.N.3    0    0    0    0    0    0    0    0    0    0    0    0    0    1
## B5.N.4    0    0    0    0    0    0    0    0    0    0    0    0    0    1
## B5.P.1    0    0    0    0    0    0    0    0    0    0    0    0    0    0
## B5.P.2    0    0    0    0    0    0    0    0    0    0    0    0    0    0
## B5.P.3    0    0    0    0    0    0    0    0    0    0    0    0    0    0
## B5.P.4    0    0    0    0    0    0    0    0    0    0    0    0    0    0
##        B5.P
## B1.C.1    0
## B1.C.2    0
## B1.C.3    0
## B1.C.4    0
## B1.N.1    0
## B1.N.2    0
## B1.N.3    0
## B1.N.4    0
## B1.P.1    0
## B1.P.2    0
## B1.P.3    0
## B1.P.4    0
## B2.C.1    0
## B2.C.2    0
## B2.C.3    0
## B2.C.4    0
## B2.N.1    0
## B2.N.2    0
## B2.N.3    0
## B2.N.4    0
## B2.P.1    0
## B2.P.2    0
## B2.P.3    0
## B2.P.4    0
## B3.C.1    0
## B3.C.2    0
## B3.C.3    0
## B3.N.1    0
## B3.N.2    0
## B3.N.3    0
## B3.P.1    0
## B3.P.2    0
## B3.P.3    0
## B4.C.1    0
## B4.C.2    0
## B4.C.3    0
## B4.C.4    0
## B4.N.1    0
## B4.N.2    0
## B4.N.3    0
## B4.P.1    0
## B4.P.2    0
## B4.P.3    0
## B5.C.1    0
## B5.C.2    0
## B5.C.3    0
## B5.C.4    0
## B5.N.1    0
## B5.N.2    0
## B5.N.3    0
## B5.N.4    0
## B5.P.1    1
## B5.P.2    1
## B5.P.3    1
## B5.P.4    1
pheatmap::pheatmap(datTraits, cluster_rows = F, cluster_cols = F, main = "Trait matrix",
                   display_numbers = T, number_format = "%.0f", legend = F)

traitColors <- numbers2colors(datTraits, signed = F)

plotDendroAndColors(sampleTree, traitColors,
                    groupLabels = names(datTraits),
                    main = paste0("Sample dendrogram of filtered top",nrow(mat.test)," highly variable genes, using log2(TPM.crt+1) as input"))

network - module

powers <- c(c(1:50))
powers
##  [1]  1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25
## [26] 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50
sft <- pickSoftThreshold(data = datExpr,
                         powerVector = powers, verbose = 5,
                         networkType = "signed")
## pickSoftThreshold: will use block size 6269.
##  pickSoftThreshold: calculating connectivity for given powers...
##    ..working on genes 1 through 6269 of 7136
##    ..working on genes 6270 through 7136 of 7136
##    Power SFT.R.sq  slope truncated.R.sq mean.k. median.k. max.k.
## 1      1   0.9130  4.140          0.889 4090.00  4270.000 4830.0
## 2      2   0.5970  1.170          0.659 2600.00  2720.000 3630.0
## 3      3   0.0529  0.223          0.485 1770.00  1800.000 2890.0
## 4      4   0.0429 -0.183          0.584 1250.00  1230.000 2370.0
## 5      5   0.2430 -0.446          0.713  923.00   854.000 1990.0
## 6      6   0.4550 -0.662          0.804  697.00   604.000 1700.0
## 7      7   0.5890 -0.844          0.845  538.00   436.000 1470.0
## 8      8   0.6720 -0.964          0.881  422.00   326.000 1290.0
## 9      9   0.7230 -1.050          0.904  337.00   251.000 1130.0
## 10    10   0.7620 -1.130          0.918  272.00   198.000 1000.0
## 11    11   0.7940 -1.190          0.936  222.00   156.000  896.0
## 12    12   0.8240 -1.240          0.950  184.00   123.000  803.0
## 13    13   0.8100 -1.310          0.932  153.00    97.700  724.0
## 14    14   0.8090 -1.360          0.928  128.00    78.000  655.0
## 15    15   0.8160 -1.390          0.932  109.00    62.500  595.0
## 16    16   0.8300 -1.410          0.940   92.60    50.200  542.0
## 17    17   0.8440 -1.420          0.951   79.40    40.900  495.0
## 18    18   0.8580 -1.420          0.958   68.40    33.200  454.0
## 19    19   0.8600 -1.440          0.957   59.30    27.100  417.0
## 20    20   0.8710 -1.450          0.964   51.60    22.200  384.0
## 21    21   0.8860 -1.450          0.973   45.20    18.200  355.0
## 22    22   0.8890 -1.450          0.974   39.70    14.900  328.0
## 23    23   0.8970 -1.460          0.978   35.00    12.400  304.0
## 24    24   0.9030 -1.450          0.982   31.00    10.300  282.0
## 25    25   0.9090 -1.450          0.985   27.50     8.580  263.0
## 26    26   0.9150 -1.460          0.988   24.50     7.140  245.0
## 27    27   0.9150 -1.460          0.987   21.90     6.030  228.0
## 28    28   0.9190 -1.460          0.990   19.60     5.050  213.0
## 29    29   0.9210 -1.460          0.989   17.70     4.240  199.0
## 30    30   0.9250 -1.460          0.991   15.90     3.600  187.0
## 31    31   0.9290 -1.460          0.992   14.40     3.050  175.0
## 32    32   0.9320 -1.460          0.993   13.00     2.570  165.0
## 33    33   0.9380 -1.450          0.996   11.80     2.180  155.0
## 34    34   0.9410 -1.450          0.996   10.80     1.870  146.0
## 35    35   0.9440 -1.440          0.997    9.82     1.600  137.0
## 36    36   0.9410 -1.450          0.994    8.97     1.370  130.0
## 37    37   0.9420 -1.440          0.996    8.21     1.160  123.0
## 38    38   0.9440 -1.440          0.997    7.53     0.988  117.0
## 39    39   0.9430 -1.440          0.995    6.91     0.841  111.0
## 40    40   0.9410 -1.450          0.995    6.36     0.719  106.0
## 41    41   0.9370 -1.450          0.992    5.86     0.617  101.0
## 42    42   0.9400 -1.450          0.994    5.41     0.528   96.2
## 43    43   0.9390 -1.460          0.993    5.00     0.452   91.8
## 44    44   0.9410 -1.460          0.994    4.63     0.393   87.6
## 45    45   0.9330 -1.470          0.988    4.29     0.340   83.7
## 46    46   0.9290 -1.470          0.988    3.99     0.294   80.0
## 47    47   0.9290 -1.470          0.989    3.71     0.257   76.5
## 48    48   0.9320 -1.470          0.991    3.45     0.222   73.2
## 49    49   0.9350 -1.470          0.992    3.21     0.194   70.1
## 50    50   0.9270 -1.480          0.986    3.00     0.167   67.2
par(mfrow = c(2,1))
cex1 = 0.9

#
plot(sft$fitIndices[,1], -sign(sft$fitIndices[,3])*sft$fitIndices[,2],
     xlab="Soft Threshold (power)", ylab = "Scale Free Topology Model Fit, Signed R^2", type = "n",
     main = paste("Scale independence"))
text(sft$fitIndices[,1], -sign(sft$fitIndices[,3])*sft$fitIndices[,2],
     labels = powers, cex = cex1, col = "red")
abline(h=0.9, col="red")

# 
plot(sft$fitIndices[,1], sft$fitIndices[,5],
     xlab = "Soft Threshold (power)", ylab = "Mean Connectivity", type = "n",
     main = paste("Mean connectivity"))
text(sft$fitIndices[,1], sft$fitIndices[,5], labels = powers,)

Co-expression similarity and adjacency
softPower <- 30
adjacency <- adjacency(datExpr = datExpr,
                       power = softPower,
                       type = "signed")
Topological Overlap Matrix (TOM)
TOM <- TOMsimilarity(adjMat = adjacency, TOMType = "signed")
## ..connectivity..
## ..matrix multiplication (system BLAS)..
## ..normalization..
## ..done.
dissTOM <- 1 - TOM
CLustering using TOM
geneTree <- hclust(as.dist(dissTOM), method = "average")
#geneTree <- hclust(as.dist(dissTOM), method = "ward.D2")

#
plot(geneTree, xlab = "", sub = "", main = "Gene clustering on TOM-based dissimilarity",
     labels = FALSE, hang = 0.04)

# the authors like large modules, so set minimum module size relatively high
minModuleSize = 30

#
dynamicMods <- cutreeDynamic(dendro = geneTree,
                             distM = dissTOM,
                             method = "hybrid",
                             deepSplit = 2,
                             pamRespectsDendro = FALSE,
                             minClusterSize = minModuleSize)
##  ..cutHeight not given, setting it to 0.998  ===>  99% of the (truncated) height range in dendro.
##  ..done.
table(dynamicMods)
## dynamicMods
##    0    1    2    3    4    5    6    7    8    9   10   11   12   13   14   15 
##  996 1929 1109  433  375  304  298  277  252  227  179  177  139  121  108   98 
##   16   17 
##   69   45
#
dynamicColors <- labels2colors(dynamicMods)
table(dynamicColors)[match(table(dynamicMods),table(dynamicColors))]
## dynamicColors
##         grey    turquoise         blue        brown       yellow        green 
##          996         1929         1109          433          375          304 
##          red        black         pink      magenta       purple  greenyellow 
##          298          277          252          227          179          177 
##          tan       salmon         cyan midnightblue    lightcyan       grey60 
##          139          121          108           98           69           45
#
plotDendroAndColors(geneTree,dynamicColors, "Dynamic Tree Cut",
                    dendroLabels = FALSE, hang = 0.03,
                    addGuide = TRUE, guideHang = 0.05,
                    main = "Gene dendrogram and module colors")

merging of modules whose expression profiles are very similar
# Calculate eigengenes         
MEList <- moduleEigengenes(datExpr, colors = dynamicColors)
MEs <- MEList$eigengenes

# dissimilarity of module eigengenes
#MEDiss <- 1 - cor(MEs)
MEDiss <- 1 - cor(MEs)

# cluster module eigengenes
#METree <- hclust(as.dist(MEDiss),method = "average")
METree <- hclust(as.dist(MEDiss),method = "ward.D2")
#
plot(METree, main = "Clustering of module eigengenes",
     xlab = "", sub = "")
abline(h=0.15, col="red")
abline(h=0.17, col="red")
abline(h=0.2, col="red")

choose a height cut ,corresponding to correlation, to merge

MEDissThres <- 0.17

#
merge <- mergeCloseModules(datExpr, dynamicColors, cutHeight = MEDissThres, verbose = 3)
##  mergeCloseModules: Merging modules whose distance is less than 0.17
##    multiSetMEs: Calculating module MEs.
##      Working on set 1 ...
##      moduleEigengenes: Calculating 18 module eigengenes in given set.
##    multiSetMEs: Calculating module MEs.
##      Working on set 1 ...
##      moduleEigengenes: Calculating 10 module eigengenes in given set.
##    multiSetMEs: Calculating module MEs.
##      Working on set 1 ...
##      moduleEigengenes: Calculating 9 module eigengenes in given set.
##    Calculating new MEs...
##    multiSetMEs: Calculating module MEs.
##      Working on set 1 ...
##      moduleEigengenes: Calculating 9 module eigengenes in given set.
mergedColors <- merge$colors
mergedMEs <- merge$newMEs
#
plotDendroAndColors(geneTree,
                    cbind(dynamicColors, mergedColors),
                    c("Dynamic Tree Cut", "Merged dynamic"),
                    dendroLabels = FALSE, hang = 0.03,
                    addGuide = TRUE, guideHang = 0.05,
                    main = "Gene dendrogram and module colors")

# rename
moduleColors <- mergedColors

#
colorOrder <- c("grey", standardColors(50))
moduleLabels <- match(moduleColors, colorOrder)-1
MEs <- mergedMEs
standardColors(50)
##  [1] "turquoise"       "blue"            "brown"           "yellow"         
##  [5] "green"           "red"             "black"           "pink"           
##  [9] "magenta"         "purple"          "greenyellow"     "tan"            
## [13] "salmon"          "cyan"            "midnightblue"    "lightcyan"      
## [17] "grey60"          "lightgreen"      "lightyellow"     "royalblue"      
## [21] "darkred"         "darkgreen"       "darkturquoise"   "darkgrey"       
## [25] "orange"          "darkorange"      "white"           "skyblue"        
## [29] "saddlebrown"     "steelblue"       "paleturquoise"   "violet"         
## [33] "darkolivegreen"  "darkmagenta"     "sienna3"         "yellowgreen"    
## [37] "skyblue3"        "plum1"           "orangered4"      "mediumpurple3"  
## [41] "lightsteelblue1" "lightcyan1"      "ivory"           "floralwhite"    
## [45] "darkorange2"     "brown4"          "bisque4"         "darkslateblue"  
## [49] "plum2"           "thistle2"
scales::show_col(standardColors(50))

table(dynamicMods)
## dynamicMods
##    0    1    2    3    4    5    6    7    8    9   10   11   12   13   14   15 
##  996 1929 1109  433  375  304  298  277  252  227  179  177  139  121  108   98 
##   16   17 
##   69   45
length(table(dynamicMods))
## [1] 18
table(moduleLabels)
## moduleLabels
##    0    2    4    5    8   12   14   16   17 
##  996 1286  375  700 3070  487  108   69   45
length(table(moduleLabels))
## [1] 9
table(moduleColors)
## moduleColors
##      blue      cyan     green      grey    grey60 lightcyan      pink       tan 
##      1286       108       700       996        45        69      3070       487 
##    yellow 
##       375
table(moduleColors)[c(1,7,2,6,8,5,3,9,4)]
## moduleColors
##      blue      pink      cyan lightcyan       tan    grey60     green    yellow 
##      1286      3070       108        69       487        45       700       375 
##      grey 
##       996
data.frame(table(moduleColors)[c(1,7,2,6,8,5,3,9,4)])
##   moduleColors Freq
## 1         blue 1286
## 2         pink 3070
## 3         cyan  108
## 4    lightcyan   69
## 5          tan  487
## 6       grey60   45
## 7        green  700
## 8       yellow  375
## 9         grey  996
moduleColors.new <- moduleColors


moduleColors.new[moduleColors.new == "blue"] <- "purple"
moduleColors.new[moduleColors.new == "pink"] <- "blue"
moduleColors.new[moduleColors.new == "cyan"] <- "darkorange"

moduleColors.new[moduleColors.new == "green"] <- "darkgreen"
moduleColors.new[moduleColors.new == "yellow"] <- "green"
moduleColors.new[moduleColors.new == "lightcyan"] <- "yellow"

moduleColors.new[moduleColors.new == "tan"] <- "turquoise"
moduleColors.new[moduleColors.new == "grey60"] <- "yellowgreen"

moduleColors.new[moduleColors.new == "grey"] <- "grey"

moduleColors.new <- factor(moduleColors.new,
                           levels = c("blue","purple",
                                      "darkorange","yellow",
                                      "yellowgreen",
                                      "green","darkgreen","turquoise",
                                      "grey"))
data.frame(table(moduleColors.new))
##   moduleColors.new Freq
## 1             blue 3070
## 2           purple 1286
## 3       darkorange  108
## 4           yellow   69
## 5      yellowgreen   45
## 6            green  375
## 7        darkgreen  700
## 8        turquoise  487
## 9             grey  996
# 
nSamples <- nrow(datTraits)

# Recalculate MEs with color labels      
#MEs0 <- moduleEigengenes(datExpr, moduleColors)$eigengenes
MEs0 <- moduleEigengenes(datExpr, moduleColors.new)$eigengenes
#MEs <- orderMEs(MEs0)
MEs <- MEs0

moduleTraitCor <- cor(MEs, datTraits, use = "p")
#moduleTraitCor <- bicor(MEs, datTraits, use = "p", robustY = F)
moduleTraitPvalue <- corPvalueStudent(moduleTraitCor, nSamples)
#
textMatrix <- paste(signif(moduleTraitCor, 2), "\n(",
                    signif(moduleTraitPvalue, 1), ")", sep = "")
dim(textMatrix) <- dim(moduleTraitCor)
par(mar = c(6, 10.5, 3, 3))
module_order <- names(MEs) %>% gsub("ME","",.)
#
labeledHeatmap(Matrix = moduleTraitCor,
               xLabels = names(datTraits),
               yLabels = names(MEs),
               ySymbols = module_order,
               colorLabels = FALSE,
               colors = blueWhiteRed(50),
               textMatrix = textMatrix,
               setStdMargins = FALSE,
               cex.text = 0.5,
               cex.lab = 0.8, 
               zlim = c(-1,1),
               main = paste("Module-trait relationships"))

par(mar = c(6, 10.5, 3, 3))
module_order <- names(MEs) %>% gsub("ME","",.)
#
idx.xlable <- c(4,10,1,7,11,13,
                14,
                2,3,12,15,8,9,5,6
                )

color.ttt <- colorRampPalette(
  c(
    "#03047F", # deep blue
    "white",
    "#CC2627"  # deep red
  )
)(100)

labeledHeatmap(Matrix = moduleTraitCor[,idx.xlable],
               xLabels = names(datTraits)[idx.xlable],
               yLabels = names(MEs),
               ySymbols = module_order,
               colorLabels = FALSE,
               colors = blueWhiteRed(50),
               #colors = color.ttt,
               textMatrix = textMatrix[,idx.xlable],
               setStdMargins = FALSE,
               cex.text = 0.5,
               cex.lab = 0.8, 
               zlim = c(-0.95,0.95),
               main = paste("Module-trait relationships"))

par(mar = c(6, 10.5, 3, 3))
module_order <- names(MEs) %>% gsub("ME","",.)
#
idx.xlable1 <- c(1,2,3,
                 10,11,12,13,14,15,
                 7,8,9,
                 4,5,6
                )

color.ttt <- colorRampPalette(
  c(
    "#03047F", # deep blue
    "white",
    "#CC2627"  # deep red
  )
)(100)

labeledHeatmap(Matrix = moduleTraitCor[,idx.xlable1],
               xLabels = names(datTraits)[idx.xlable1],
               yLabels = names(MEs),
               ySymbols = module_order,
               colorLabels = FALSE,
               colors = blueWhiteRed(50),
               #colors = color.ttt,
               textMatrix = textMatrix[,idx.xlable1],
               setStdMargins = FALSE,
               cex.text = 0.5,
               cex.lab = 0.8, 
               zlim = c(-0.95,0.95),
               main = paste("Module-trait relationships"))

results

module-gene list

for GO enrichment

rownames(mat.test)[moduleColors.new=="darkorange"]
##   [1] "Ctsd"      "C2"        "Ctss"      "Tyrobp"    "C1qb"      "C1qa"     
##   [7] "Ctsz"      "Itm2b"     "Trem2"     "Cd9"       "Ctsl"      "Serpine2" 
##  [13] "Grn"       "Cd63"      "Cd68"      "Hexa"      "Ctsa"      "Syngr1"   
##  [19] "Ly86"      "Lag3"      "Pld3"      "Creg1"     "Gusb"      "Ssr4"     
##  [25] "Lrpap1"    "Cd34"      "Lamp2"     "Glmp"      "Bsg"       "Cadm1"    
##  [31] "Tpp1"      "Ergic3"    "Slc11a1"   "Cd84"      "Vkorc1"    "Erp29"    
##  [37] "Mt1"       "Mpc1"      "Ifnar2"    "Tmed3"     "Sdf4"      "Grcc10"   
##  [43] "Naglu"     "Sdf2l1"    "Rnasek"    "Dpp7"      "Atp6ap1"   "Glb1"     
##  [49] "Fkbp2"     "Aplp2"     "Nrp1"      "Ramp1"     "Nucb1"     "Gaa"      
##  [55] "Pebp1"     "Fuca1"     "Lipa"      "Atp6v1c1"  "Psat1"     "Renbp"    
##  [61] "Dusp3"     "Napa"      "Colgalt1"  "Dnase2a"   "Ank"       "Klhl6"    
##  [67] "Fam20c"    "Gde1"      "Pdcd1"     "Gba"       "Os9"       "Siglecf"  
##  [73] "Atxn10"    "Apoc4"     "Speg"      "Fgf13"     "C1galt1c1" "Nceh1"    
##  [79] "Echs1"     "Ephx1"     "Slc25a4"   "Apbb2"     "Cyb5r1"    "Sil1"     
##  [85] "Crtap"     "Ddx31"     "Ogfod3"    "St14"      "Ggh"       "Ttyh2"    
##  [91] "Plekhm2"   "Scarb2"    "Rgs3"      "Tmem191c"  "Cacna1a"   "Chst2"    
##  [97] "Olfr110"   "Fam3c"     "Apmap"     "Retsat"    "Olfr111"   "Arhgap24" 
## [103] "Tcim"      "Ptchd1"    "Sord"      "Mrgpre"    "Enpp1"     "Dnajb14"

module-correlation

par(cex = 0.9)

plotEigengeneNetworks(MEs, "",
                      marDendro = c(0,4.5,1,3),
                      marHeatmap = c(5,4.5,1,3),
                      cex.lab = 0.8, 
                      xLabelsAngle = 90)

check DEGs overlap

DEG.df <- list(B1.PvsC = read.csv("../analysis_DEGs.new202310/edgeR_DEGs.cnt.B1.P_vs_C.csv"),
               B2.PvsC = read.csv("../analysis_DEGs.new202310/edgeR_DEGs.cnt.B2.P_vs_C.csv"),
               B3.PvsC = read.csv("../analysis_DEGs.new202310/edgeR_DEGs.cnt.B3.P_vs_C.csv"),
               B4.PvsC = read.csv("../analysis_DEGs.new202310/edgeR_DEGs.cnt.B4.P_vs_C.csv"),
               B5.PvsC = read.csv("../analysis_DEGs.new202310/edgeR_DEGs.cnt.B5.P_vs_C.csv"))
DEG.df <- lapply(DEG.df, function(x){
  rownames(x) <- x$gene
  x
})
lapply(DEG.df, head)
## $B1.PvsC
##        gene log2FoldChange    logCPM        LR        pvalue          padj
## Cst7   Cst7       8.495187 11.329086 2311.8046  0.000000e+00  0.000000e+00
## Apoe   Apoe       5.700928 15.207754 1847.1664  0.000000e+00  0.000000e+00
## Lpl     Lpl       6.496993  9.587738  829.5032 2.078124e-182 6.868199e-179
## Igf1   Igf1       8.259064  7.996613  814.5473 3.709031e-179 9.193760e-176
## Chst2 Chst2       5.343060  7.790534  645.9321 1.713193e-142 3.397261e-139
## Ch25h Ch25h       8.205729  7.737993  637.0509 1.463288e-140 2.418084e-137
##              FC
## Cst7  360.83293
## Apoe   52.01760
## Lpl    90.32119
## Igf1  306.35565
## Chst2  40.59021
## Ch25h 295.23683
## 
## $B2.PvsC
##          gene log2FoldChange   logCPM       LR        pvalue          padj
## Iigp1   Iigp1       7.947249 8.247172 942.7679 4.952481e-207 5.463578e-203
## Cxcl10 Cxcl10       6.929976 7.883841 877.2153 8.810742e-193 4.860005e-189
## Cfb       Cfb      10.448006 9.826880 864.0345 6.463601e-190 2.376881e-186
## Gbp2     Gbp2       6.933162 8.868324 839.2066 1.614679e-184 4.453283e-181
## Cd72     Cd72       5.662713 8.773877 806.2563 2.354160e-177 5.194218e-174
## Tgtp2   Tgtp2       6.331374 8.223505 805.0476 4.311557e-177 7.927516e-174
##                FC
## Iigp1   246.80858
## Cxcl10  121.93564
## Cfb    1396.89283
## Gbp2    122.20517
## Cd72     50.65782
## Tgtp2    80.52548
## 
## $B3.PvsC
##        gene log2FoldChange   logCPM       LR pvalue padj        FC
## Cfb     Cfb       9.354500 7.870005 1549.713      0    0 654.61377
## C4b     C4b       7.525886 9.643096 1842.803      0    0 184.29670
## H2-Aa H2-Aa       7.515011 9.346702 1945.710      0    0 182.91262
## H2-Q7 H2-Q7       7.234406 9.122856 2353.472      0    0 150.58209
## Cst7   Cst7       6.586956 9.015511 1820.812      0    0  96.13272
## H2-Q6 H2-Q6       6.291742 8.339651 1727.806      0    0  78.34350
## 
## $B4.PvsC
##        gene log2FoldChange    logCPM        LR        pvalue          padj
## Spp1   Spp1       7.561344  8.255223 1247.4543 2.967059e-273 2.893476e-269
## Axl     Axl       5.138007  8.389047 1077.0282 3.248776e-236 1.584103e-232
## Itgax Itgax       4.908976  7.567481  812.4350 1.067804e-178 3.471074e-175
## Cst7   Cst7       5.148687  7.641870  718.1918 3.310198e-158 8.070262e-155
## Apoe   Apoe       5.080873 14.998443  704.7216 2.811618e-155 5.483780e-152
## Mmp12 Mmp12       7.267412  7.091789  675.0327 8.037983e-149 1.306440e-145
##              FC
## Spp1  188.88233
## Axl    35.21229
## Itgax  30.04340
## Cst7   35.47393
## Apoe   33.84504
## Mmp12 154.06675
## 
## $B5.PvsC
##              gene log2FoldChange    logCPM       LR        pvalue          padj
## Axl           Axl       4.864722  7.834561 619.7363 8.533379e-137 8.786821e-133
## Apoe         Apoe       5.189902 13.327538 548.4807 2.694577e-121 1.387303e-117
## Lgals3bp Lgals3bp       3.540251 10.685313 529.3506 3.910460e-117 1.342200e-113
## Itgax       Itgax       3.313357  6.799122 480.0826 2.050056e-106 5.277358e-103
## H2-D1       H2-D1       3.074170 10.944743 454.6999 6.843673e-101  1.409386e-97
## Lyz2         Lyz2       3.058867  9.866918 423.7639  3.700029e-94  6.349867e-91
##                 FC
## Axl      29.135812
## Apoe     36.501950
## Lgals3bp 11.633803
## Itgax     9.940766
## H2-D1     8.422042
## Lyz2      8.333179
DEG.list.FC1.5 <- list(B1.Pup = proc_DEG(DEG.df$B1.PvsC, p.cut = 0.05, FC.cut = 1.5, abs = F)$gene,
                       B1.Cup = proc_DEG(DEG.df$B1.PvsC, p.cut = 0.05, FC.cut = -1.5, abs = F)$gene,
                       B2.Pup = proc_DEG(DEG.df$B2.PvsC, p.cut = 0.05, FC.cut = 1.5, abs = F)$gene,
                       B2.Cup = proc_DEG(DEG.df$B2.PvsC, p.cut = 0.05, FC.cut = -1.5, abs = F)$gene,
                       B3.Pup = proc_DEG(DEG.df$B3.PvsC, p.cut = 0.05, FC.cut = 1.5, abs = F)$gene,
                       B3.Cup = proc_DEG(DEG.df$B3.PvsC, p.cut = 0.05, FC.cut = -1.5, abs = F)$gene,
                       B4.Pup = proc_DEG(DEG.df$B4.PvsC, p.cut = 0.05, FC.cut = 1.5, abs = F)$gene,
                       B4.Cup = proc_DEG(DEG.df$B4.PvsC, p.cut = 0.05, FC.cut = -1.5, abs = F)$gene,
                       B5.Pup = proc_DEG(DEG.df$B5.PvsC, p.cut = 0.05, FC.cut = 1.5, abs = F)$gene,
                       B5.Cup = proc_DEG(DEG.df$B5.PvsC, p.cut = 0.05, FC.cut = -1.5, abs = F)$gene)

DEG.list.FC2 <- list(B1.Pup = proc_DEG(DEG.df$B1.PvsC, p.cut = 0.05, FC.cut = 2, abs = F)$gene,
                       B1.Cup = proc_DEG(DEG.df$B1.PvsC, p.cut = 0.05, FC.cut = -2, abs = F)$gene,
                       B2.Pup = proc_DEG(DEG.df$B2.PvsC, p.cut = 0.05, FC.cut = 2, abs = F)$gene,
                       B2.Cup = proc_DEG(DEG.df$B2.PvsC, p.cut = 0.05, FC.cut = -2, abs = F)$gene,
                       B3.Pup = proc_DEG(DEG.df$B3.PvsC, p.cut = 0.05, FC.cut = 2, abs = F)$gene,
                       B3.Cup = proc_DEG(DEG.df$B3.PvsC, p.cut = 0.05, FC.cut = -2, abs = F)$gene,
                       B4.Pup = proc_DEG(DEG.df$B4.PvsC, p.cut = 0.05, FC.cut = 2, abs = F)$gene,
                       B4.Cup = proc_DEG(DEG.df$B4.PvsC, p.cut = 0.05, FC.cut = -2, abs = F)$gene,
                       B5.Pup = proc_DEG(DEG.df$B5.PvsC, p.cut = 0.05, FC.cut = 2, abs = F)$gene,
                       B5.Cup = proc_DEG(DEG.df$B5.PvsC, p.cut = 0.05, FC.cut = -2, abs = F)$gene)
library(UpSetR)
## Warning: package 'UpSetR' was built under R version 4.0.5
upset(fromList(DEG.list.FC1.5),
      order.by = 'freq', nsets = 10, point.size=3.5, line.size =1, text.scale = 2)
  
grid::grid.text("padj<0.05, |FC|>1.5",x = 0.65, y=0.95, gp=grid::gpar(fontsize=20))

upset(fromList(DEG.list.FC2),
      order.by = 'freq', nsets = 10, point.size=3.5, line.size =1, text.scale = 2)
  
grid::grid.text("padj<0.05, |FC|>2",x = 0.65, y=0.95, gp=grid::gpar(fontsize=20))

upset(fromList(DEG.list.FC1.5[c(1,3,5,7,9)]),
      order.by = 'freq', nsets = 8, point.size=3.5, line.size =1, text.scale = 1.6)
  
grid::grid.text("padj<0.05, |FC|>1.5",x = 0.65, y=0.95, gp=grid::gpar(fontsize=15))

upset(fromList(DEG.list.FC1.5[c(2,4,6,8,10)]),
      order.by = 'freq', nsets = 8, point.size=3.5, line.size =1, text.scale = 1.5)
  
grid::grid.text("padj<0.05, |FC|>1.5",x = 0.65, y=0.95, gp=grid::gpar(fontsize=15))

#
upset(fromList(DEG.list.FC2[c(1,3,5,7,9)]),
      order.by = 'freq', nsets = 8, point.size=3.5, line.size =1, text.scale = 1.5)
  
grid::grid.text("padj<0.05, |FC|>2",x = 0.65, y=0.95, gp=grid::gpar(fontsize=15))

upset(fromList(DEG.list.FC2[c(2,4,6,8,10)]),
      order.by = 'freq', nsets = 8, point.size=3.5, line.size =1, text.scale = 1.6)
  
grid::grid.text("padj<0.05, |FC|>2",x = 0.65, y=0.95, gp=grid::gpar(fontsize=15))

pdf("./result.new/DEG.overlap/DEG.overlap.PvsC.Pup.FC1.5.pdf", width = 9,height = 5.4)
upset(fromList(DEG.list.FC1.5[c(1,3,5,7,9)]),
      order.by = 'freq', nsets = 8, point.size=3.5, line.size =1, text.scale = 1.3)
  
grid::grid.text("padj<0.05, |FC|>1.5",x = 0.65, y=0.95, gp=grid::gpar(fontsize=15))
dev.off()
## png 
##   2
pdf("./result.new/DEG.overlap/DEG.overlap.PvsC.Cup.FC1.5.pdf", width = 9,height = 5.4)
upset(fromList(DEG.list.FC1.5[c(2,4,6,8,10)]),
      order.by = 'freq', nsets = 8, point.size=3.5, line.size =1, text.scale = 1.3)
  
grid::grid.text("padj<0.05, |FC|>1.5",x = 0.65, y=0.95, gp=grid::gpar(fontsize=15))
dev.off()
## png 
##   2
#
pdf("./result.new/DEG.overlap/DEG.overlap.PvsC.Pup.FC2.pdf", width = 9,height = 5.4)

upset(fromList(DEG.list.FC2[c(1,3,5,7,9)]),
      order.by = 'freq', nsets = 8, point.size=3.5, line.size =1, text.scale = 1.3)
  
grid::grid.text("padj<0.05, |FC|>2",x = 0.65, y=0.95, gp=grid::gpar(fontsize=15))
dev.off()
## png 
##   2
pdf("./result.new/DEG.overlap/DEG.overlap.PvsC.Cup.FC2.pdf", width = 9,height = 5.4)
upset(fromList(DEG.list.FC2[c(2,4,6,8,10)]),
      order.by = 'freq', nsets = 8, point.size=3.5, line.size =1, text.scale = 1.3)
  
grid::grid.text("padj<0.05, |FC|>2",x = 0.65, y=0.95, gp=grid::gpar(fontsize=15))
dev.off()
## png 
##   2

TOMplot

#
plotTOM <- dissTOM^7

#
diag(plotTOM) <- NA
TOMplot(plotTOM, geneTree, moduleColors.new, main = ("Network heatmap plot, all genes"), col=gplots::colorpanel(250,'darkred',"orange",'lemonchiffon'))

TOMplot(plotTOM, geneTree, moduleColors.new, main = ("Network heatmap plot, all genes"))

module-heatmap

output heatmap and matrix for each module

font.size <- 0.11/ (table(moduleColors.new) /2000)
font.size[font.size >2] <- font.size[font.size >2] +2.5
font.size[font.size <0.5] <- 0.5
font.size[font.size >10] <- 10

gg.xxx <- list()
for(xxx in names(table(moduleColors.new))){
gg.xxx[[xxx]] <- cowplot::plot_grid(
pheatmap::pheatmap(zscore_mat(log2(matt_pc.combine.crt[rownames(mat.test)[moduleColors.new==xxx],]+1)),cluster_rows = T, cluster_cols = F,
         main = paste0(xxx,", zscore of log2(TPM.crt +1)"), 
         color = color.test, 
         show_rownames = T, fontsize_row = font.size[xxx],
         #gaps_row = length(rets$up), 
         #gaps_row = 40, 
         silent = T,
         #gaps_col = length(Aidx),
         breaks = seq(-2,2,0.04))$gtable,

pheatmap::pheatmap(zscore_mat(log2(matt_pc.combine[rownames(mat.test)[moduleColors.new==xxx],]+1)),cluster_rows = T, cluster_cols = F,
         main = paste0(xxx,", zscore of log2(TPM +1)"), 
         color = color.test, 
         show_rownames = T, fontsize_row = font.size[xxx],
         #gaps_row = length(rets$up), 
         #gaps_row = 40, 
         silent = T,
         #gaps_col = length(Aidx),
         breaks = seq(-2,2,0.04))$gtable,
ncol=2)
}  
#gg.xxx
#gg.xxx
cowplot::plot_grid(
  plotlist = gg.xxx,
  ncol = 3
)

# get reordered pheatmap rownames from TPM.crt one                
#    see https://www.plob.org/article/16698.html

out0 <- pheatmap::pheatmap(zscore_mat(log2(matt_pc.combine.crt[rownames(mat.test)[moduleColors.new=="blue"],]+1)),cluster_rows = T, cluster_cols = F,
         main = paste0(xxx,", zscore of log2(TPM.crt +1)"), 
         color = color.test, fontsize = 25,
         show_rownames = T, fontsize_row = 0.5,
         #gaps_row = length(rets$up), 
         #gaps_row = 40, 
         silent = T,
         #gaps_col = length(Aidx),
         breaks = seq(-2,2,0.04))

head(rownames(mat.test)[moduleColors.new=="blue"][out0$tree_row$order],50)
##  [1] "Fancg"         "Mdn1"          "Lmbrd1"        "Tmub1"        
##  [5] "Wdr18"         "Kiz"           "Tmem237"       "Exoc7"        
##  [9] "Vps26c"        "Rad9a"         "Ufd1"          "Zmynd8"       
## [13] "Mettl6"        "Ccdc66"        "Mccc1"         "Smagp"        
## [17] "Itgb1"         "Unc119b"       "Slc35b4"       "Nr1d2"        
## [21] "Stam"          "9430015G10Rik" "Rnf123"        "B3galnt2"     
## [25] "Reep4"         "Nfs1"          "Fnta"          "Clec16a"      
## [29] "Mllt11"        "Zfp748"        "Rnaseh1"       "Atrip"        
## [33] "Cdk5rap1"      "Tnfrsf23"      "Nnt"           "Wdyhv1"       
## [37] "Mprip"         "Ncor2"         "Cep120"        "Mettl4"       
## [41] "Idh1"          "Rabgap1l"      "Fnbp4"         "Fpgt"         
## [45] "Ssr3"          "Plcg2"         "Lgals8"        "Rlim"         
## [49] "Rel"           "Arhgap30"
#save.image("I:/Shared_win/projects/20230211_microglia_module/analysis_new202310/test1.RData")